1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662 //
1663 // Compute padding required for nodes which need alignment
1664 //
1665
1666 // The address of the call instruction needs to be 4-byte aligned to
1667 // ensure that it does not span a cache line so that it can be patched.
1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1669 {
1670 current_offset += clear_avx_size(); // skip vzeroupper
1671 current_offset += 1; // skip call opcode byte
1672 return align_up(current_offset, alignment_required()) - current_offset;
1673 }
1674
1675 // The address of the call instruction needs to be 4-byte aligned to
1676 // ensure that it does not span a cache line so that it can be patched.
1677 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1678 {
1679 current_offset += clear_avx_size(); // skip vzeroupper
1680 current_offset += 11; // skip movq instruction + call opcode byte
1681 return align_up(current_offset, alignment_required()) - current_offset;
1682 }
1683
1684 // This could be in MacroAssembler but it's fairly C2 specific
1685 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1686 Label exit;
1687 __ jccb(Assembler::noParity, exit);
1688 __ pushf();
1689 //
1690 // comiss/ucomiss instructions set ZF,PF,CF flags and
1691 // zero OF,AF,SF for NaN values.
1692 // Fixup flags by zeroing ZF,PF so that compare of NaN
1693 // values returns 'less than' result (CF is set).
1694 // Leave the rest of flags unchanged.
1695 //
1696 // 7 6 5 4 3 2 1 0
1697 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1698 // 0 0 1 0 1 0 1 1 (0x2B)
1699 //
1700 __ andq(Address(rsp, 0), 0xffffff2b);
1701 __ popf();
1702 __ bind(exit);
1703 }
1704
1705 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1706 // If any floating point comparison instruction is used, unordered case always triggers jump
1707 // for below condition, CF=1 is true when at least one input is NaN
1708 Label done;
1709 __ movl(dst, -1);
1710 __ jcc(Assembler::below, done);
1711 __ setcc(Assembler::notEqual, dst);
1712 __ bind(done);
1713 }
1714
1715 enum FP_PREC {
1716 fp_prec_hlf,
1717 fp_prec_flt,
1718 fp_prec_dbl
1719 };
1720
1721 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1722 XMMRegister p, XMMRegister q) {
1723 if (pt == fp_prec_hlf) {
1724 __ evucomish(p, q);
1725 } else if (pt == fp_prec_flt) {
1726 __ ucomiss(p, q);
1727 } else {
1728 __ ucomisd(p, q);
1729 }
1730 }
1731
1732 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1733 XMMRegister dst, XMMRegister src, Register scratch) {
1734 if (pt == fp_prec_hlf) {
1735 __ movhlf(dst, src, scratch);
1736 } else if (pt == fp_prec_flt) {
1737 __ movflt(dst, src);
1738 } else {
1739 __ movdbl(dst, src);
1740 }
1741 }
1742
1743 // Math.min() # Math.max()
1744 // -----------------------------
1745 // (v)ucomis[h/s/d] #
1746 // ja -> b # a
1747 // jp -> NaN # NaN
1748 // jb -> a # b
1749 // je -> a | b # a & b
1750 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1751 XMMRegister a, XMMRegister b, Register rt,
1752 bool min, enum FP_PREC pt) {
1753 Label nan, zero, below, above, done;
1754
1755 emit_fp_ucom(masm, pt, a, b);
1756
1757 if (dst->encoding() != (min ? b : a)->encoding()) {
1758 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1759 } else {
1760 __ jccb(Assembler::above, done);
1761 }
1762 __ jccb(Assembler::parity, nan); // PF=1
1763 __ jccb(Assembler::below, below); // CF=1
1764
1765 // equal
1766 // Using bitwise operations is a low cost way to compute the correct result
1767 // for zero and non-zero inputs in this scenario except for NaN, which is
1768 // handled separately. The mantissa and exponent are valid with either
1769 // bitwise operation. For zero inputs, the sign bit is chosen according to
1770 // whether a minimum or maximum value is required.
1771 if (min) {
1772 // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
1773 __ vpor(dst, a, b, Assembler::AVX_128bit);
1774 } else {
1775 // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
1776 __ vpand(dst, a, b, Assembler::AVX_128bit);
1777 }
1778 __ jmp(done);
1779
1780 __ bind(above);
1781 movfp(masm, pt, dst, min ? b : a, rt);
1782 __ jmp(done);
1783
1784 __ bind(nan);
1785 if (pt == fp_prec_hlf) {
1786 __ movl(rt, 0x00007e00); // Float16.NaN
1787 __ evmovw(dst, rt);
1788 } else if (pt == fp_prec_flt) {
1789 __ movl(rt, 0x7fc00000); // Float.NaN
1790 __ movdl(dst, rt);
1791 } else {
1792 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1793 __ movdq(dst, rt);
1794 }
1795 __ jmp(done);
1796
1797 __ bind(below);
1798 movfp(masm, pt, dst, min ? a : b, rt);
1799
1800 __ bind(done);
1801 }
1802
1803 //=============================================================================
1804 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1805
1806 int ConstantTable::calculate_table_base_offset() const {
1807 return 0; // absolute addressing, no offset
1808 }
1809
1810 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1811 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1812 ShouldNotReachHere();
1813 }
1814
1815 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1816 // Empty encoding
1817 }
1818
1819 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1820 return 0;
1821 }
1822
1823 #ifndef PRODUCT
1824 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1825 st->print("# MachConstantBaseNode (empty encoding)");
1826 }
1827 #endif
1828
1829
1830 //=============================================================================
1831 #ifndef PRODUCT
1832 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1833 Compile* C = ra_->C;
1834
1835 int framesize = C->output()->frame_size_in_bytes();
1836 int bangsize = C->output()->bang_size_in_bytes();
1837 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1838 // Remove wordSize for return addr which is already pushed.
1839 framesize -= wordSize;
1840
1841 if (C->output()->need_stack_bang(bangsize)) {
1842 framesize -= wordSize;
1843 st->print("# stack bang (%d bytes)", bangsize);
1844 st->print("\n\t");
1845 st->print("pushq rbp\t# Save rbp");
1846 if (PreserveFramePointer) {
1847 st->print("\n\t");
1848 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1849 }
1850 if (framesize) {
1851 st->print("\n\t");
1852 st->print("subq rsp, #%d\t# Create frame",framesize);
1853 }
1854 } else {
1855 st->print("subq rsp, #%d\t# Create frame",framesize);
1856 st->print("\n\t");
1857 framesize -= wordSize;
1858 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1859 if (PreserveFramePointer) {
1860 st->print("\n\t");
1861 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1862 if (framesize > 0) {
1863 st->print("\n\t");
1864 st->print("addq rbp, #%d", framesize);
1865 }
1866 }
1867 }
1868
1869 if (VerifyStackAtCalls) {
1870 st->print("\n\t");
1871 framesize -= wordSize;
1872 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1873 #ifdef ASSERT
1874 st->print("\n\t");
1875 st->print("# stack alignment check");
1876 #endif
1877 }
1878 if (C->stub_function() != nullptr) {
1879 st->print("\n\t");
1880 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1881 st->print("\n\t");
1882 st->print("je fast_entry\t");
1883 st->print("\n\t");
1884 st->print("call #nmethod_entry_barrier_stub\t");
1885 st->print("\n\tfast_entry:");
1886 }
1887 st->cr();
1888 }
1889 #endif
1890
1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1892 Compile* C = ra_->C;
1893
1894 __ verified_entry(C);
1895
1896 if (ra_->C->stub_function() == nullptr) {
1897 __ entry_barrier();
1898 }
1899
1900 if (!Compile::current()->output()->in_scratch_emit_size()) {
1901 __ bind(*_verified_entry);
1902 }
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914
1915 int MachPrologNode::reloc() const
1916 {
1917 return 0; // a large enough number
1918 }
1919
1920 //=============================================================================
1921 #ifndef PRODUCT
1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1923 {
1924 Compile* C = ra_->C;
1925 if (generate_vzeroupper(C)) {
1926 st->print("vzeroupper");
1927 st->cr(); st->print("\t");
1928 }
1929
1930 int framesize = C->output()->frame_size_in_bytes();
1931 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1932 // Remove word for return adr already pushed
1933 // and RBP
1934 framesize -= 2*wordSize;
1935
1936 if (framesize) {
1937 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1938 st->print("\t");
1939 }
1940
1941 st->print_cr("popq rbp");
1942 if (do_polling() && C->is_method_compilation()) {
1943 st->print("\t");
1944 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1945 "ja #safepoint_stub\t"
1946 "# Safepoint: poll for GC");
1947 }
1948 }
1949 #endif
1950
1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1952 {
1953 Compile* C = ra_->C;
1954
1955 if (generate_vzeroupper(C)) {
1956 // Clear upper bits of YMM registers when current compiled code uses
1957 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1958 __ vzeroupper();
1959 }
1960
1961 // Subtract two words to account for return address and rbp
1962 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1963 __ remove_frame(initial_framesize, C->needs_stack_repair());
1964
1965 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1966 __ reserved_stack_check();
1967 }
1968
1969 if (do_polling() && C->is_method_compilation()) {
1970 Label dummy_label;
1971 Label* code_stub = &dummy_label;
1972 if (!C->output()->in_scratch_emit_size()) {
1973 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1974 C->output()->add_stub(stub);
1975 code_stub = &stub->entry();
1976 }
1977 __ relocate(relocInfo::poll_return_type);
1978 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1979 }
1980 }
1981
1982 int MachEpilogNode::reloc() const
1983 {
1984 return 2; // a large enough number
1985 }
1986
1987 const Pipeline* MachEpilogNode::pipeline() const
1988 {
1989 return MachNode::pipeline_class();
1990 }
1991
1992 //=============================================================================
1993
1994 enum RC {
1995 rc_bad,
1996 rc_int,
1997 rc_kreg,
1998 rc_float,
1999 rc_stack
2000 };
2001
2002 static enum RC rc_class(OptoReg::Name reg)
2003 {
2004 if( !OptoReg::is_valid(reg) ) return rc_bad;
2005
2006 if (OptoReg::is_stack(reg)) return rc_stack;
2007
2008 VMReg r = OptoReg::as_VMReg(reg);
2009
2010 if (r->is_Register()) return rc_int;
2011
2012 if (r->is_KRegister()) return rc_kreg;
2013
2014 assert(r->is_XMMRegister(), "must be");
2015 return rc_float;
2016 }
2017
2018 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2019 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2020 int src_hi, int dst_hi, uint ireg, outputStream* st);
2021
2022 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2023 int stack_offset, int reg, uint ireg, outputStream* st);
2024
2025 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2026 int dst_offset, uint ireg, outputStream* st) {
2027 if (masm) {
2028 switch (ireg) {
2029 case Op_VecS:
2030 __ movq(Address(rsp, -8), rax);
2031 __ movl(rax, Address(rsp, src_offset));
2032 __ movl(Address(rsp, dst_offset), rax);
2033 __ movq(rax, Address(rsp, -8));
2034 break;
2035 case Op_VecD:
2036 __ pushq(Address(rsp, src_offset));
2037 __ popq (Address(rsp, dst_offset));
2038 break;
2039 case Op_VecX:
2040 __ pushq(Address(rsp, src_offset));
2041 __ popq (Address(rsp, dst_offset));
2042 __ pushq(Address(rsp, src_offset+8));
2043 __ popq (Address(rsp, dst_offset+8));
2044 break;
2045 case Op_VecY:
2046 __ vmovdqu(Address(rsp, -32), xmm0);
2047 __ vmovdqu(xmm0, Address(rsp, src_offset));
2048 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2049 __ vmovdqu(xmm0, Address(rsp, -32));
2050 break;
2051 case Op_VecZ:
2052 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2053 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2054 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2055 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2056 break;
2057 default:
2058 ShouldNotReachHere();
2059 }
2060 #ifndef PRODUCT
2061 } else {
2062 switch (ireg) {
2063 case Op_VecS:
2064 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2065 "movl rax, [rsp + #%d]\n\t"
2066 "movl [rsp + #%d], rax\n\t"
2067 "movq rax, [rsp - #8]",
2068 src_offset, dst_offset);
2069 break;
2070 case Op_VecD:
2071 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2072 "popq [rsp + #%d]",
2073 src_offset, dst_offset);
2074 break;
2075 case Op_VecX:
2076 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2077 "popq [rsp + #%d]\n\t"
2078 "pushq [rsp + #%d]\n\t"
2079 "popq [rsp + #%d]",
2080 src_offset, dst_offset, src_offset+8, dst_offset+8);
2081 break;
2082 case Op_VecY:
2083 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2084 "vmovdqu xmm0, [rsp + #%d]\n\t"
2085 "vmovdqu [rsp + #%d], xmm0\n\t"
2086 "vmovdqu xmm0, [rsp - #32]",
2087 src_offset, dst_offset);
2088 break;
2089 case Op_VecZ:
2090 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2091 "vmovdqu xmm0, [rsp + #%d]\n\t"
2092 "vmovdqu [rsp + #%d], xmm0\n\t"
2093 "vmovdqu xmm0, [rsp - #64]",
2094 src_offset, dst_offset);
2095 break;
2096 default:
2097 ShouldNotReachHere();
2098 }
2099 #endif
2100 }
2101 }
2102
2103 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2104 PhaseRegAlloc* ra_,
2105 bool do_size,
2106 outputStream* st) const {
2107 assert(masm != nullptr || st != nullptr, "sanity");
2108 // Get registers to move
2109 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2110 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2111 OptoReg::Name dst_second = ra_->get_reg_second(this);
2112 OptoReg::Name dst_first = ra_->get_reg_first(this);
2113
2114 enum RC src_second_rc = rc_class(src_second);
2115 enum RC src_first_rc = rc_class(src_first);
2116 enum RC dst_second_rc = rc_class(dst_second);
2117 enum RC dst_first_rc = rc_class(dst_first);
2118
2119 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2120 "must move at least 1 register" );
2121
2122 if (src_first == dst_first && src_second == dst_second) {
2123 // Self copy, no move
2124 return 0;
2125 }
2126 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
2127 uint ireg = ideal_reg();
2128 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2129 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2130 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2131 // mem -> mem
2132 int src_offset = ra_->reg2offset(src_first);
2133 int dst_offset = ra_->reg2offset(dst_first);
2134 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2135 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2136 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2137 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2138 int stack_offset = ra_->reg2offset(dst_first);
2139 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2140 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2141 int stack_offset = ra_->reg2offset(src_first);
2142 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2143 } else {
2144 ShouldNotReachHere();
2145 }
2146 return 0;
2147 }
2148 if (src_first_rc == rc_stack) {
2149 // mem ->
2150 if (dst_first_rc == rc_stack) {
2151 // mem -> mem
2152 assert(src_second != dst_first, "overlap");
2153 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2154 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2155 // 64-bit
2156 int src_offset = ra_->reg2offset(src_first);
2157 int dst_offset = ra_->reg2offset(dst_first);
2158 if (masm) {
2159 __ pushq(Address(rsp, src_offset));
2160 __ popq (Address(rsp, dst_offset));
2161 #ifndef PRODUCT
2162 } else {
2163 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2164 "popq [rsp + #%d]",
2165 src_offset, dst_offset);
2166 #endif
2167 }
2168 } else {
2169 // 32-bit
2170 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2171 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2172 // No pushl/popl, so:
2173 int src_offset = ra_->reg2offset(src_first);
2174 int dst_offset = ra_->reg2offset(dst_first);
2175 if (masm) {
2176 __ movq(Address(rsp, -8), rax);
2177 __ movl(rax, Address(rsp, src_offset));
2178 __ movl(Address(rsp, dst_offset), rax);
2179 __ movq(rax, Address(rsp, -8));
2180 #ifndef PRODUCT
2181 } else {
2182 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2183 "movl rax, [rsp + #%d]\n\t"
2184 "movl [rsp + #%d], rax\n\t"
2185 "movq rax, [rsp - #8]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 }
2190 return 0;
2191 } else if (dst_first_rc == rc_int) {
2192 // mem -> gpr
2193 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2194 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2195 // 64-bit
2196 int offset = ra_->reg2offset(src_first);
2197 if (masm) {
2198 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2199 #ifndef PRODUCT
2200 } else {
2201 st->print("movq %s, [rsp + #%d]\t# spill",
2202 Matcher::regName[dst_first],
2203 offset);
2204 #endif
2205 }
2206 } else {
2207 // 32-bit
2208 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2209 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2210 int offset = ra_->reg2offset(src_first);
2211 if (masm) {
2212 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2213 #ifndef PRODUCT
2214 } else {
2215 st->print("movl %s, [rsp + #%d]\t# spill",
2216 Matcher::regName[dst_first],
2217 offset);
2218 #endif
2219 }
2220 }
2221 return 0;
2222 } else if (dst_first_rc == rc_float) {
2223 // mem-> xmm
2224 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2225 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2226 // 64-bit
2227 int offset = ra_->reg2offset(src_first);
2228 if (masm) {
2229 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2230 #ifndef PRODUCT
2231 } else {
2232 st->print("%s %s, [rsp + #%d]\t# spill",
2233 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2234 Matcher::regName[dst_first],
2235 offset);
2236 #endif
2237 }
2238 } else {
2239 // 32-bit
2240 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2241 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2242 int offset = ra_->reg2offset(src_first);
2243 if (masm) {
2244 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2245 #ifndef PRODUCT
2246 } else {
2247 st->print("movss %s, [rsp + #%d]\t# spill",
2248 Matcher::regName[dst_first],
2249 offset);
2250 #endif
2251 }
2252 }
2253 return 0;
2254 } else if (dst_first_rc == rc_kreg) {
2255 // mem -> kreg
2256 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2257 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2258 // 64-bit
2259 int offset = ra_->reg2offset(src_first);
2260 if (masm) {
2261 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2262 #ifndef PRODUCT
2263 } else {
2264 st->print("kmovq %s, [rsp + #%d]\t# spill",
2265 Matcher::regName[dst_first],
2266 offset);
2267 #endif
2268 }
2269 }
2270 return 0;
2271 }
2272 } else if (src_first_rc == rc_int) {
2273 // gpr ->
2274 if (dst_first_rc == rc_stack) {
2275 // gpr -> mem
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(dst_first);
2280 if (masm) {
2281 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("movq [rsp + #%d], %s\t# spill",
2285 offset,
2286 Matcher::regName[src_first]);
2287 #endif
2288 }
2289 } else {
2290 // 32-bit
2291 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2292 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2293 int offset = ra_->reg2offset(dst_first);
2294 if (masm) {
2295 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2296 #ifndef PRODUCT
2297 } else {
2298 st->print("movl [rsp + #%d], %s\t# spill",
2299 offset,
2300 Matcher::regName[src_first]);
2301 #endif
2302 }
2303 }
2304 return 0;
2305 } else if (dst_first_rc == rc_int) {
2306 // gpr -> gpr
2307 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2308 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2309 // 64-bit
2310 if (masm) {
2311 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2312 as_Register(Matcher::_regEncode[src_first]));
2313 #ifndef PRODUCT
2314 } else {
2315 st->print("movq %s, %s\t# spill",
2316 Matcher::regName[dst_first],
2317 Matcher::regName[src_first]);
2318 #endif
2319 }
2320 return 0;
2321 } else {
2322 // 32-bit
2323 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2324 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2325 if (masm) {
2326 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2327 as_Register(Matcher::_regEncode[src_first]));
2328 #ifndef PRODUCT
2329 } else {
2330 st->print("movl %s, %s\t# spill",
2331 Matcher::regName[dst_first],
2332 Matcher::regName[src_first]);
2333 #endif
2334 }
2335 return 0;
2336 }
2337 } else if (dst_first_rc == rc_float) {
2338 // gpr -> xmm
2339 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2340 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2341 // 64-bit
2342 if (masm) {
2343 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2344 #ifndef PRODUCT
2345 } else {
2346 st->print("movdq %s, %s\t# spill",
2347 Matcher::regName[dst_first],
2348 Matcher::regName[src_first]);
2349 #endif
2350 }
2351 } else {
2352 // 32-bit
2353 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2354 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2355 if (masm) {
2356 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2357 #ifndef PRODUCT
2358 } else {
2359 st->print("movdl %s, %s\t# spill",
2360 Matcher::regName[dst_first],
2361 Matcher::regName[src_first]);
2362 #endif
2363 }
2364 }
2365 return 0;
2366 } else if (dst_first_rc == rc_kreg) {
2367 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2368 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2369 // 64-bit
2370 if (masm) {
2371 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2372 #ifndef PRODUCT
2373 } else {
2374 st->print("kmovq %s, %s\t# spill",
2375 Matcher::regName[dst_first],
2376 Matcher::regName[src_first]);
2377 #endif
2378 }
2379 }
2380 Unimplemented();
2381 return 0;
2382 }
2383 } else if (src_first_rc == rc_float) {
2384 // xmm ->
2385 if (dst_first_rc == rc_stack) {
2386 // xmm -> mem
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 int offset = ra_->reg2offset(dst_first);
2391 if (masm) {
2392 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("movsd [rsp + #%d], %s\t# spill",
2396 offset,
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 } else {
2401 // 32-bit
2402 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2403 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2404 int offset = ra_->reg2offset(dst_first);
2405 if (masm) {
2406 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2407 #ifndef PRODUCT
2408 } else {
2409 st->print("movss [rsp + #%d], %s\t# spill",
2410 offset,
2411 Matcher::regName[src_first]);
2412 #endif
2413 }
2414 }
2415 return 0;
2416 } else if (dst_first_rc == rc_int) {
2417 // xmm -> gpr
2418 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2419 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2420 // 64-bit
2421 if (masm) {
2422 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2423 #ifndef PRODUCT
2424 } else {
2425 st->print("movdq %s, %s\t# spill",
2426 Matcher::regName[dst_first],
2427 Matcher::regName[src_first]);
2428 #endif
2429 }
2430 } else {
2431 // 32-bit
2432 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2433 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2434 if (masm) {
2435 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2436 #ifndef PRODUCT
2437 } else {
2438 st->print("movdl %s, %s\t# spill",
2439 Matcher::regName[dst_first],
2440 Matcher::regName[src_first]);
2441 #endif
2442 }
2443 }
2444 return 0;
2445 } else if (dst_first_rc == rc_float) {
2446 // xmm -> xmm
2447 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2448 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2449 // 64-bit
2450 if (masm) {
2451 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2452 #ifndef PRODUCT
2453 } else {
2454 st->print("%s %s, %s\t# spill",
2455 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2456 Matcher::regName[dst_first],
2457 Matcher::regName[src_first]);
2458 #endif
2459 }
2460 } else {
2461 // 32-bit
2462 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2463 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2464 if (masm) {
2465 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2466 #ifndef PRODUCT
2467 } else {
2468 st->print("%s %s, %s\t# spill",
2469 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2470 Matcher::regName[dst_first],
2471 Matcher::regName[src_first]);
2472 #endif
2473 }
2474 }
2475 return 0;
2476 } else if (dst_first_rc == rc_kreg) {
2477 assert(false, "Illegal spilling");
2478 return 0;
2479 }
2480 } else if (src_first_rc == rc_kreg) {
2481 if (dst_first_rc == rc_stack) {
2482 // mem -> kreg
2483 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2484 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2485 // 64-bit
2486 int offset = ra_->reg2offset(dst_first);
2487 if (masm) {
2488 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2489 #ifndef PRODUCT
2490 } else {
2491 st->print("kmovq [rsp + #%d] , %s\t# spill",
2492 offset,
2493 Matcher::regName[src_first]);
2494 #endif
2495 }
2496 }
2497 return 0;
2498 } else if (dst_first_rc == rc_int) {
2499 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2500 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2501 // 64-bit
2502 if (masm) {
2503 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2504 #ifndef PRODUCT
2505 } else {
2506 st->print("kmovq %s, %s\t# spill",
2507 Matcher::regName[dst_first],
2508 Matcher::regName[src_first]);
2509 #endif
2510 }
2511 }
2512 Unimplemented();
2513 return 0;
2514 } else if (dst_first_rc == rc_kreg) {
2515 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2516 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2517 // 64-bit
2518 if (masm) {
2519 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2520 #ifndef PRODUCT
2521 } else {
2522 st->print("kmovq %s, %s\t# spill",
2523 Matcher::regName[dst_first],
2524 Matcher::regName[src_first]);
2525 #endif
2526 }
2527 }
2528 return 0;
2529 } else if (dst_first_rc == rc_float) {
2530 assert(false, "Illegal spill");
2531 return 0;
2532 }
2533 }
2534
2535 assert(0," foo ");
2536 Unimplemented();
2537 return 0;
2538 }
2539
2540 #ifndef PRODUCT
2541 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2542 implementation(nullptr, ra_, false, st);
2543 }
2544 #endif
2545
2546 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2547 implementation(masm, ra_, false, nullptr);
2548 }
2549
2550 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2551 return MachNode::size(ra_);
2552 }
2553
2554 //=============================================================================
2555 #ifndef PRODUCT
2556 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2557 {
2558 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2559 int reg = ra_->get_reg_first(this);
2560 st->print("leaq %s, [rsp + #%d]\t# box lock",
2561 Matcher::regName[reg], offset);
2562 }
2563 #endif
2564
2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2566 {
2567 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2568 int reg = ra_->get_encode(this);
2569
2570 __ lea(as_Register(reg), Address(rsp, offset));
2571 }
2572
2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2574 {
2575 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2576 if (ra_->get_encode(this) > 15) {
2577 return (offset < 0x80) ? 6 : 9; // REX2
2578 } else {
2579 return (offset < 0x80) ? 5 : 8; // REX
2580 }
2581 }
2582
2583 //=============================================================================
2584 #ifndef PRODUCT
2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2586 {
2587 st->print_cr("MachVEPNode");
2588 }
2589 #endif
2590
2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2592 {
2593 CodeBuffer* cbuf = masm->code();
2594 if (!_verified) {
2595 __ ic_check(1);
2596 } else {
2597 if (ra_->C->stub_function() == nullptr) {
2598 // Emit the entry barrier in a temporary frame before unpacking because
2599 // it can deopt, which would require packing the scalarized args again.
2600 __ verified_entry(ra_->C, 0);
2601 __ entry_barrier();
2602 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2603 __ remove_frame(initial_framesize, false);
2604 }
2605 // Unpack inline type args passed as oop and then jump to
2606 // the verified entry point (skipping the unverified entry).
2607 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2608 // Emit code for verified entry and save increment for stack repair on return
2609 __ verified_entry(ra_->C, sp_inc);
2610 if (Compile::current()->output()->in_scratch_emit_size()) {
2611 Label dummy_verified_entry;
2612 __ jmp(dummy_verified_entry);
2613 } else {
2614 __ jmp(*_verified_entry);
2615 }
2616 }
2617 if (ra_->C->stub_function() == nullptr) {
2618 // Pad so that the next call to MachVEPNode::emit() starts out with the
2619 // correct alignment. This is needed by entry_barrier() to align the
2620 // compare. But unfortunately we need to align all 4 MachVEPNodes because
2621 // entry point offsets are computed using scratch_emit_size(), so starting
2622 // alignment must match the alignment of the scratch buffer, otherwise the sizes
2623 // will be off.
2624 __ align(4);
2625 }
2626 }
2627
2628 //=============================================================================
2629 #ifndef PRODUCT
2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2631 {
2632 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2633 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2634 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2635 }
2636 #endif
2637
2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2639 {
2640 __ ic_check(InteriorEntryAlignment);
2641 }
2642
2643
2644 //=============================================================================
2645
2646 bool Matcher::supports_vector_calling_convention(void) {
2647 return EnableVectorSupport;
2648 }
2649
2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2651 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2652 }
2653
2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2655 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2656 }
2657
2658 #ifdef ASSERT
2659 static bool is_ndd_demotable(const MachNode* mdef) {
2660 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2661 }
2662 #endif
2663
2664 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2665 int oper_index) {
2666 if (mdef == nullptr) {
2667 return false;
2668 }
2669
2670 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2671 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2672 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2673 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2674 return false;
2675 }
2676
2677 // Complex memory operand covers multiple incoming edges needed for
2678 // address computation. Biasing def towards any address component will not
2679 // result in NDD demotion by assembler.
2680 if (mdef->operand_num_edges(oper_index) != 1) {
2681 return false;
2682 }
2683
2684 // Demotion candidate must be register mask compatible with definition.
2685 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2686 if (!oper_mask.overlap(mdef->out_RegMask())) {
2687 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2688 return false;
2689 }
2690
2691 switch (oper_index) {
2692 // First operand of MachNode corresponding to Intel APX NDD selection
2693 // pattern can share its assigned register with definition operand if
2694 // their live ranges do not overlap. In such a scenario we can demote
2695 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2696 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2697 // are decorated with a special flag by instruction selector.
2698 case 1:
2699 return is_ndd_demotable_opr1(mdef);
2700
2701 // Definition operand of commutative operation can be biased towards second
2702 // operand.
2703 case 2:
2704 return is_ndd_demotable_opr2(mdef);
2705
2706 // Current scheme only selects up to two biasing candidates
2707 default:
2708 assert(false, "unhandled operand index: %s", mdef->Name());
2709 break;
2710 }
2711
2712 return false;
2713 }
2714
2715 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2716 assert(EnableVectorSupport, "sanity");
2717 int lo = XMM0_num;
2718 int hi = XMM0b_num;
2719 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2720 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2721 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2722 return OptoRegPair(hi, lo);
2723 }
2724
2725 // Is this branch offset short enough that a short branch can be used?
2726 //
2727 // NOTE: If the platform does not provide any short branch variants, then
2728 // this method should return false for offset 0.
2729 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2730 // The passed offset is relative to address of the branch.
2731 // On 86 a branch displacement is calculated relative to address
2732 // of a next instruction.
2733 offset -= br_size;
2734
2735 // the short version of jmpConUCF2 contains multiple branches,
2736 // making the reach slightly less
2737 if (rule == jmpConUCF2_rule)
2738 return (-126 <= offset && offset <= 125);
2739 return (-128 <= offset && offset <= 127);
2740 }
2741
2742 #ifdef ASSERT
2743 // Return whether or not this register is ever used as an argument.
2744 bool Matcher::can_be_java_arg(int reg)
2745 {
2746 return
2747 reg == RDI_num || reg == RDI_H_num ||
2748 reg == RSI_num || reg == RSI_H_num ||
2749 reg == RDX_num || reg == RDX_H_num ||
2750 reg == RCX_num || reg == RCX_H_num ||
2751 reg == R8_num || reg == R8_H_num ||
2752 reg == R9_num || reg == R9_H_num ||
2753 reg == R12_num || reg == R12_H_num ||
2754 reg == XMM0_num || reg == XMM0b_num ||
2755 reg == XMM1_num || reg == XMM1b_num ||
2756 reg == XMM2_num || reg == XMM2b_num ||
2757 reg == XMM3_num || reg == XMM3b_num ||
2758 reg == XMM4_num || reg == XMM4b_num ||
2759 reg == XMM5_num || reg == XMM5b_num ||
2760 reg == XMM6_num || reg == XMM6b_num ||
2761 reg == XMM7_num || reg == XMM7b_num;
2762 }
2763 #endif
2764
2765 uint Matcher::int_pressure_limit()
2766 {
2767 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2768 }
2769
2770 uint Matcher::float_pressure_limit()
2771 {
2772 // After experiment around with different values, the following default threshold
2773 // works best for LCM's register pressure scheduling on x64.
2774 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2775 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2776 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2777 }
2778
2779 // Register for the first projection of an int pair
2780 const RegMask& Matcher::firstI_proj_mask() {
2781 return INT_RAX_REG_mask();
2782 }
2783
2784 // Register for the second projection of an int pair
2785 const RegMask& Matcher::secondI_proj_mask() {
2786 return INT_RDX_REG_mask();
2787 }
2788
2789 // Register for the first projection of a long pair
2790 const RegMask& Matcher::firstL_proj_mask() {
2791 return LONG_RAX_REG_mask();
2792 }
2793
2794 // Register for the second projection of a long pair
2795 const RegMask& Matcher::secondL_proj_mask() {
2796 return LONG_RDX_REG_mask();
2797 }
2798
2799 %}
2800
2801 source_hpp %{
2802 // Header information of the source block.
2803 // Method declarations/definitions which are used outside
2804 // the ad-scope can conveniently be defined here.
2805 //
2806 // To keep related declarations/definitions/uses close together,
2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2808
2809 #include "runtime/vm_version.hpp"
2810
2811 class NativeJump;
2812
2813 class CallStubImpl {
2814
2815 //--------------------------------------------------------------
2816 //---< Used for optimization in Compile::shorten_branches >---
2817 //--------------------------------------------------------------
2818
2819 public:
2820 // Size of call trampoline stub.
2821 static uint size_call_trampoline() {
2822 return 0; // no call trampolines on this platform
2823 }
2824
2825 // number of relocations needed by a call trampoline stub
2826 static uint reloc_call_trampoline() {
2827 return 0; // no call trampolines on this platform
2828 }
2829 };
2830
2831 class HandlerImpl {
2832
2833 public:
2834
2835 static int emit_deopt_handler(C2_MacroAssembler* masm);
2836
2837 static uint size_deopt_handler() {
2838 // one call and one jmp.
2839 return 7;
2840 }
2841 };
2842
2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2844 switch(bytes) {
2845 case 4: // fall-through
2846 case 8: // fall-through
2847 case 16: return Assembler::AVX_128bit;
2848 case 32: return Assembler::AVX_256bit;
2849 case 64: return Assembler::AVX_512bit;
2850
2851 default: {
2852 ShouldNotReachHere();
2853 return Assembler::AVX_NoVec;
2854 }
2855 }
2856 }
2857
2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2859 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2860 }
2861
2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2863 uint def_idx = use->operand_index(opnd);
2864 Node* def = use->in(def_idx);
2865 return vector_length_encoding(def);
2866 }
2867
2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
2869 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2870 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2871 }
2872
2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2874 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2875 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2876 }
2877
2878 class Node::PD {
2879 public:
2880 enum NodeFlags : uint64_t {
2881 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2882 Flag_sets_carry_flag = Node::_last_flag << 2,
2883 Flag_sets_parity_flag = Node::_last_flag << 3,
2884 Flag_sets_zero_flag = Node::_last_flag << 4,
2885 Flag_sets_overflow_flag = Node::_last_flag << 5,
2886 Flag_sets_sign_flag = Node::_last_flag << 6,
2887 Flag_clears_carry_flag = Node::_last_flag << 7,
2888 Flag_clears_parity_flag = Node::_last_flag << 8,
2889 Flag_clears_zero_flag = Node::_last_flag << 9,
2890 Flag_clears_overflow_flag = Node::_last_flag << 10,
2891 Flag_clears_sign_flag = Node::_last_flag << 11,
2892 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2893 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2894 _last_flag = Flag_ndd_demotable_opr2
2895 };
2896 };
2897
2898 %} // end source_hpp
2899
2900 source %{
2901
2902 #include "opto/addnode.hpp"
2903 #include "c2_intelJccErratum_x86.hpp"
2904
2905 void PhaseOutput::pd_perform_mach_node_analysis() {
2906 if (VM_Version::has_intel_jcc_erratum()) {
2907 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2908 _buf_sizes._code += extra_padding;
2909 }
2910 }
2911
2912 int MachNode::pd_alignment_required() const {
2913 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2914 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2915 return IntelJccErratum::largest_jcc_size() + 1;
2916 } else {
2917 return 1;
2918 }
2919 }
2920
2921 int MachNode::compute_padding(int current_offset) const {
2922 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2923 Compile* C = Compile::current();
2924 PhaseOutput* output = C->output();
2925 Block* block = output->block();
2926 int index = output->index();
2927 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2928 } else {
2929 return 0;
2930 }
2931 }
2932
2933 // Emit deopt handler code.
2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2935
2936 // Note that the code buffer's insts_mark is always relative to insts.
2937 // That's why we must use the macroassembler to generate a handler.
2938 address base = __ start_a_stub(size_deopt_handler());
2939 if (base == nullptr) {
2940 ciEnv::current()->record_failure("CodeCache is full");
2941 return 0; // CodeBuffer::expand failed
2942 }
2943 int offset = __ offset();
2944
2945 Label start;
2946 __ bind(start);
2947
2948 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2949
2950 int entry_offset = __ offset();
2951
2952 __ jmp(start);
2953
2954 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2955 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2956 "out of bounds read in post-call NOP check");
2957 __ end_a_stub();
2958 return entry_offset;
2959 }
2960
2961 static Assembler::Width widthForType(BasicType bt) {
2962 if (bt == T_BYTE) {
2963 return Assembler::B;
2964 } else if (bt == T_SHORT) {
2965 return Assembler::W;
2966 } else if (bt == T_INT) {
2967 return Assembler::D;
2968 } else {
2969 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2970 return Assembler::Q;
2971 }
2972 }
2973
2974 //=============================================================================
2975
2976 // Float masks come from different places depending on platform.
2977 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2978 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2979 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2980 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2981 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2982 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2983 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2984 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2985 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2986 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2987 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2988 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2989 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2990 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2991 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2992 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2993 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2994 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2995 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2996
2997 //=============================================================================
2998 bool Matcher::match_rule_supported(int opcode) {
2999 if (!has_match_rule(opcode)) {
3000 return false; // no match rule present
3001 }
3002 switch (opcode) {
3003 case Op_AbsVL:
3004 case Op_StoreVectorScatter:
3005 if (UseAVX < 3) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountI:
3010 case Op_PopCountL:
3011 if (!UsePopCountInstruction) {
3012 return false;
3013 }
3014 break;
3015 case Op_PopCountVI:
3016 if (UseAVX < 2) {
3017 return false;
3018 }
3019 break;
3020 case Op_CompressV:
3021 case Op_ExpandV:
3022 case Op_PopCountVL:
3023 if (UseAVX < 2) {
3024 return false;
3025 }
3026 break;
3027 case Op_MulVI:
3028 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3029 return false;
3030 }
3031 break;
3032 case Op_MulVL:
3033 if (UseSSE < 4) { // only with SSE4_1 or AVX
3034 return false;
3035 }
3036 break;
3037 case Op_MulReductionVL:
3038 if (VM_Version::supports_avx512dq() == false) {
3039 return false;
3040 }
3041 break;
3042 case Op_AbsVB:
3043 case Op_AbsVS:
3044 case Op_AbsVI:
3045 case Op_AddReductionVI:
3046 case Op_AndReductionV:
3047 case Op_OrReductionV:
3048 case Op_XorReductionV:
3049 if (UseSSE < 3) { // requires at least SSSE3
3050 return false;
3051 }
3052 break;
3053 case Op_MaxHF:
3054 case Op_MinHF:
3055 if (!VM_Version::supports_avx512vlbw()) {
3056 return false;
3057 } // fallthrough
3058 case Op_AddHF:
3059 case Op_DivHF:
3060 case Op_FmaHF:
3061 case Op_MulHF:
3062 case Op_ReinterpretS2HF:
3063 case Op_ReinterpretHF2S:
3064 case Op_SubHF:
3065 case Op_SqrtHF:
3066 if (!VM_Version::supports_avx512_fp16()) {
3067 return false;
3068 }
3069 break;
3070 case Op_VectorLoadShuffle:
3071 case Op_VectorRearrange:
3072 case Op_MulReductionVI:
3073 if (UseSSE < 4) { // requires at least SSE4
3074 return false;
3075 }
3076 break;
3077 case Op_IsInfiniteF:
3078 case Op_IsInfiniteD:
3079 if (!VM_Version::supports_avx512dq()) {
3080 return false;
3081 }
3082 break;
3083 case Op_SqrtVD:
3084 case Op_SqrtVF:
3085 case Op_VectorMaskCmp:
3086 case Op_VectorCastB2X:
3087 case Op_VectorCastS2X:
3088 case Op_VectorCastI2X:
3089 case Op_VectorCastL2X:
3090 case Op_VectorCastF2X:
3091 case Op_VectorCastD2X:
3092 case Op_VectorUCastB2X:
3093 case Op_VectorUCastS2X:
3094 case Op_VectorUCastI2X:
3095 case Op_VectorMaskCast:
3096 if (UseAVX < 1) { // enabled for AVX only
3097 return false;
3098 }
3099 break;
3100 case Op_PopulateIndex:
3101 if (UseAVX < 2) {
3102 return false;
3103 }
3104 break;
3105 case Op_RoundVF:
3106 if (UseAVX < 2) { // enabled for AVX2 only
3107 return false;
3108 }
3109 break;
3110 case Op_RoundVD:
3111 if (UseAVX < 3) {
3112 return false; // enabled for AVX3 only
3113 }
3114 break;
3115 case Op_CompareAndSwapL:
3116 case Op_CompareAndSwapP:
3117 break;
3118 case Op_StrIndexOf:
3119 if (!UseSSE42Intrinsics) {
3120 return false;
3121 }
3122 break;
3123 case Op_StrIndexOfChar:
3124 if (!UseSSE42Intrinsics) {
3125 return false;
3126 }
3127 break;
3128 case Op_OnSpinWait:
3129 if (VM_Version::supports_on_spin_wait() == false) {
3130 return false;
3131 }
3132 break;
3133 case Op_MulVB:
3134 case Op_LShiftVB:
3135 case Op_RShiftVB:
3136 case Op_URShiftVB:
3137 case Op_VectorInsert:
3138 case Op_VectorLoadMask:
3139 case Op_VectorStoreMask:
3140 case Op_VectorBlend:
3141 if (UseSSE < 4) {
3142 return false;
3143 }
3144 break;
3145 case Op_MaxD:
3146 case Op_MaxF:
3147 case Op_MinD:
3148 case Op_MinF:
3149 if (UseAVX < 1) { // enabled for AVX only
3150 return false;
3151 }
3152 break;
3153 case Op_CacheWB:
3154 case Op_CacheWBPreSync:
3155 case Op_CacheWBPostSync:
3156 if (!VM_Version::supports_data_cache_line_flush()) {
3157 return false;
3158 }
3159 break;
3160 case Op_ExtractB:
3161 case Op_ExtractL:
3162 case Op_ExtractI:
3163 case Op_RoundDoubleMode:
3164 if (UseSSE < 4) {
3165 return false;
3166 }
3167 break;
3168 case Op_RoundDoubleModeV:
3169 if (VM_Version::supports_avx() == false) {
3170 return false; // 128bit vroundpd is not available
3171 }
3172 break;
3173 case Op_LoadVectorGather:
3174 case Op_LoadVectorGatherMasked:
3175 if (UseAVX < 2) {
3176 return false;
3177 }
3178 break;
3179 case Op_FmaF:
3180 case Op_FmaD:
3181 case Op_FmaVD:
3182 case Op_FmaVF:
3183 if (!UseFMA) {
3184 return false;
3185 }
3186 break;
3187 case Op_MacroLogicV:
3188 if (UseAVX < 3 || !UseVectorMacroLogic) {
3189 return false;
3190 }
3191 break;
3192
3193 case Op_VectorCmpMasked:
3194 if (!UseCountTrailingZerosInstruction) {
3195 return false;
3196 }
3197 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3198 return false;
3199 }
3200 break;
3201 case Op_VectorMaskGen:
3202 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3203 return false;
3204 }
3205 break;
3206 case Op_VectorMaskFirstTrue:
3207 case Op_VectorMaskLastTrue:
3208 case Op_VectorMaskTrueCount:
3209 case Op_VectorMaskToLong:
3210 if (UseAVX < 1) {
3211 return false;
3212 }
3213 break;
3214 case Op_RoundF:
3215 case Op_RoundD:
3216 break;
3217 case Op_CopySignD:
3218 case Op_CopySignF:
3219 if (UseAVX < 3) {
3220 return false;
3221 }
3222 if (!VM_Version::supports_avx512vl()) {
3223 return false;
3224 }
3225 break;
3226 case Op_CompressBits:
3227 case Op_ExpandBits:
3228 if (!VM_Version::supports_bmi2()) {
3229 return false;
3230 }
3231 break;
3232 case Op_CompressM:
3233 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3234 return false;
3235 }
3236 break;
3237 case Op_ConvF2HF:
3238 case Op_ConvHF2F:
3239 if (!VM_Version::supports_float16()) {
3240 return false;
3241 }
3242 break;
3243 case Op_VectorCastF2HF:
3244 case Op_VectorCastHF2F:
3245 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3246 return false;
3247 }
3248 break;
3249 }
3250 return true; // Match rules are supported by default.
3251 }
3252
3253 //------------------------------------------------------------------------
3254
3255 static inline bool is_pop_count_instr_target(BasicType bt) {
3256 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3257 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3258 }
3259
3260 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3261 return match_rule_supported_vector(opcode, vlen, bt);
3262 }
3263
3264 // Identify extra cases that we might want to provide match rules for vector nodes and
3265 // other intrinsics guarded with vector length (vlen) and element type (bt).
3266 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3267 if (!match_rule_supported(opcode)) {
3268 return false;
3269 }
3270 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3271 // * SSE2 supports 128bit vectors for all types;
3272 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3273 // * AVX2 supports 256bit vectors for all types;
3274 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3275 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3276 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3277 // And MaxVectorSize is taken into account as well.
3278 if (!vector_size_supported(bt, vlen)) {
3279 return false;
3280 }
3281 // Special cases which require vector length follow:
3282 // * implementation limitations
3283 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3284 // * 128bit vroundpd instruction is present only in AVX1
3285 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3286 switch (opcode) {
3287 case Op_MaxVHF:
3288 case Op_MinVHF:
3289 if (!VM_Version::supports_avx512bw()) {
3290 return false;
3291 }
3292 case Op_AddVHF:
3293 case Op_DivVHF:
3294 case Op_FmaVHF:
3295 case Op_MulVHF:
3296 case Op_SubVHF:
3297 case Op_SqrtVHF:
3298 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3299 return false;
3300 }
3301 if (!VM_Version::supports_avx512_fp16()) {
3302 return false;
3303 }
3304 break;
3305 case Op_AbsVF:
3306 case Op_NegVF:
3307 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3308 return false; // 512bit vandps and vxorps are not available
3309 }
3310 break;
3311 case Op_AbsVD:
3312 case Op_NegVD:
3313 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3314 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3315 }
3316 break;
3317 case Op_RotateRightV:
3318 case Op_RotateLeftV:
3319 if (bt != T_INT && bt != T_LONG) {
3320 return false;
3321 } // fallthrough
3322 case Op_MacroLogicV:
3323 if (!VM_Version::supports_evex() ||
3324 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3325 return false;
3326 }
3327 break;
3328 case Op_ClearArray:
3329 case Op_VectorMaskGen:
3330 case Op_VectorCmpMasked:
3331 if (!VM_Version::supports_avx512bw()) {
3332 return false;
3333 }
3334 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3335 return false;
3336 }
3337 break;
3338 case Op_LoadVectorMasked:
3339 case Op_StoreVectorMasked:
3340 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3341 return false;
3342 }
3343 break;
3344 case Op_UMinV:
3345 case Op_UMaxV:
3346 if (UseAVX == 0) {
3347 return false;
3348 }
3349 break;
3350 case Op_UMinReductionV:
3351 case Op_UMaxReductionV:
3352 if (UseAVX == 0) {
3353 return false;
3354 }
3355 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3356 return false;
3357 }
3358 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3359 return false;
3360 }
3361 break;
3362 case Op_MaxV:
3363 case Op_MinV:
3364 if (UseSSE < 4 && is_integral_type(bt)) {
3365 return false;
3366 }
3367 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3368 // Float/Double intrinsics are enabled for AVX family currently.
3369 if (UseAVX == 0) {
3370 return false;
3371 }
3372 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3373 return false;
3374 }
3375 }
3376 break;
3377 case Op_CallLeafVector:
3378 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3379 return false;
3380 }
3381 break;
3382 case Op_AddReductionVI:
3383 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3384 return false;
3385 }
3386 // fallthrough
3387 case Op_AndReductionV:
3388 case Op_OrReductionV:
3389 case Op_XorReductionV:
3390 if (is_subword_type(bt) && (UseSSE < 4)) {
3391 return false;
3392 }
3393 break;
3394 case Op_MinReductionV:
3395 case Op_MaxReductionV:
3396 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3397 return false;
3398 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3399 return false;
3400 }
3401 // Float/Double intrinsics enabled for AVX family.
3402 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3403 return false;
3404 }
3405 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3406 return false;
3407 }
3408 break;
3409 case Op_VectorBlend:
3410 if (UseAVX == 0 && size_in_bits < 128) {
3411 return false;
3412 }
3413 break;
3414 case Op_VectorTest:
3415 if (UseSSE < 4) {
3416 return false; // Implementation limitation
3417 } else if (size_in_bits < 32) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_VectorLoadShuffle:
3422 case Op_VectorRearrange:
3423 if(vlen == 2) {
3424 return false; // Implementation limitation due to how shuffle is loaded
3425 } else if (size_in_bits == 256 && UseAVX < 2) {
3426 return false; // Implementation limitation
3427 }
3428 break;
3429 case Op_VectorLoadMask:
3430 case Op_VectorMaskCast:
3431 if (size_in_bits == 256 && UseAVX < 2) {
3432 return false; // Implementation limitation
3433 }
3434 // fallthrough
3435 case Op_VectorStoreMask:
3436 if (vlen == 2) {
3437 return false; // Implementation limitation
3438 }
3439 break;
3440 case Op_PopulateIndex:
3441 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3442 return false;
3443 }
3444 break;
3445 case Op_VectorCastB2X:
3446 case Op_VectorCastS2X:
3447 case Op_VectorCastI2X:
3448 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3449 return false;
3450 }
3451 break;
3452 case Op_VectorCastL2X:
3453 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3454 return false;
3455 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3456 return false;
3457 }
3458 break;
3459 case Op_VectorCastF2X: {
3460 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3461 // happen after intermediate conversion to integer and special handling
3462 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3463 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3464 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3465 return false;
3466 }
3467 }
3468 // fallthrough
3469 case Op_VectorCastD2X:
3470 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3471 return false;
3472 }
3473 break;
3474 case Op_VectorCastF2HF:
3475 case Op_VectorCastHF2F:
3476 if (!VM_Version::supports_f16c() &&
3477 ((!VM_Version::supports_evex() ||
3478 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3479 return false;
3480 }
3481 break;
3482 case Op_RoundVD:
3483 if (!VM_Version::supports_avx512dq()) {
3484 return false;
3485 }
3486 break;
3487 case Op_MulReductionVI:
3488 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3489 return false;
3490 }
3491 break;
3492 case Op_LoadVectorGatherMasked:
3493 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3494 return false;
3495 }
3496 if (is_subword_type(bt) &&
3497 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3498 (size_in_bits < 64) ||
3499 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3500 return false;
3501 }
3502 break;
3503 case Op_StoreVectorScatterMasked:
3504 case Op_StoreVectorScatter:
3505 if (is_subword_type(bt)) {
3506 return false;
3507 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3508 return false;
3509 }
3510 // fallthrough
3511 case Op_LoadVectorGather:
3512 if (!is_subword_type(bt) && size_in_bits == 64) {
3513 return false;
3514 }
3515 if (is_subword_type(bt) && size_in_bits < 64) {
3516 return false;
3517 }
3518 break;
3519 case Op_SaturatingAddV:
3520 case Op_SaturatingSubV:
3521 if (UseAVX < 1) {
3522 return false; // Implementation limitation
3523 }
3524 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3525 return false;
3526 }
3527 break;
3528 case Op_SelectFromTwoVector:
3529 if (size_in_bits < 128) {
3530 return false;
3531 }
3532 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3533 return false;
3534 }
3535 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3536 return false;
3537 }
3538 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3539 return false;
3540 }
3541 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3542 return false;
3543 }
3544 break;
3545 case Op_MaskAll:
3546 if (!VM_Version::supports_evex()) {
3547 return false;
3548 }
3549 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3550 return false;
3551 }
3552 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3553 return false;
3554 }
3555 break;
3556 case Op_VectorMaskCmp:
3557 if (vlen < 2 || size_in_bits < 32) {
3558 return false;
3559 }
3560 break;
3561 case Op_CompressM:
3562 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3563 return false;
3564 }
3565 break;
3566 case Op_CompressV:
3567 case Op_ExpandV:
3568 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3569 return false;
3570 }
3571 if (size_in_bits < 128 ) {
3572 return false;
3573 }
3574 case Op_VectorLongToMask:
3575 if (UseAVX < 1) {
3576 return false;
3577 }
3578 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3579 return false;
3580 }
3581 break;
3582 case Op_SignumVD:
3583 case Op_SignumVF:
3584 if (UseAVX < 1) {
3585 return false;
3586 }
3587 break;
3588 case Op_PopCountVI:
3589 case Op_PopCountVL: {
3590 if (!is_pop_count_instr_target(bt) &&
3591 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3592 return false;
3593 }
3594 }
3595 break;
3596 case Op_ReverseV:
3597 case Op_ReverseBytesV:
3598 if (UseAVX < 2) {
3599 return false;
3600 }
3601 break;
3602 case Op_CountTrailingZerosV:
3603 case Op_CountLeadingZerosV:
3604 if (UseAVX < 2) {
3605 return false;
3606 }
3607 break;
3608 }
3609 return true; // Per default match rules are supported.
3610 }
3611
3612 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3613 // ADLC based match_rule_supported routine checks for the existence of pattern based
3614 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3615 // of their non-masked counterpart with mask edge being the differentiator.
3616 // This routine does a strict check on the existence of masked operation patterns
3617 // by returning a default false value for all the other opcodes apart from the
3618 // ones whose masked instruction patterns are defined in this file.
3619 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3620 return false;
3621 }
3622
3623 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3624 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3625 return false;
3626 }
3627 switch(opcode) {
3628 // Unary masked operations
3629 case Op_AbsVB:
3630 case Op_AbsVS:
3631 if(!VM_Version::supports_avx512bw()) {
3632 return false; // Implementation limitation
3633 }
3634 case Op_AbsVI:
3635 case Op_AbsVL:
3636 return true;
3637
3638 // Ternary masked operations
3639 case Op_FmaVF:
3640 case Op_FmaVD:
3641 return true;
3642
3643 case Op_MacroLogicV:
3644 if(bt != T_INT && bt != T_LONG) {
3645 return false;
3646 }
3647 return true;
3648
3649 // Binary masked operations
3650 case Op_AddVB:
3651 case Op_AddVS:
3652 case Op_SubVB:
3653 case Op_SubVS:
3654 case Op_MulVS:
3655 case Op_LShiftVS:
3656 case Op_RShiftVS:
3657 case Op_URShiftVS:
3658 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3659 if (!VM_Version::supports_avx512bw()) {
3660 return false; // Implementation limitation
3661 }
3662 return true;
3663
3664 case Op_MulVL:
3665 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3666 if (!VM_Version::supports_avx512dq()) {
3667 return false; // Implementation limitation
3668 }
3669 return true;
3670
3671 case Op_AndV:
3672 case Op_OrV:
3673 case Op_XorV:
3674 case Op_RotateRightV:
3675 case Op_RotateLeftV:
3676 if (bt != T_INT && bt != T_LONG) {
3677 return false; // Implementation limitation
3678 }
3679 return true;
3680
3681 case Op_VectorLoadMask:
3682 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3683 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3684 return false;
3685 }
3686 return true;
3687
3688 case Op_AddVI:
3689 case Op_AddVL:
3690 case Op_AddVF:
3691 case Op_AddVD:
3692 case Op_SubVI:
3693 case Op_SubVL:
3694 case Op_SubVF:
3695 case Op_SubVD:
3696 case Op_MulVI:
3697 case Op_MulVF:
3698 case Op_MulVD:
3699 case Op_DivVF:
3700 case Op_DivVD:
3701 case Op_SqrtVF:
3702 case Op_SqrtVD:
3703 case Op_LShiftVI:
3704 case Op_LShiftVL:
3705 case Op_RShiftVI:
3706 case Op_RShiftVL:
3707 case Op_URShiftVI:
3708 case Op_URShiftVL:
3709 case Op_LoadVectorMasked:
3710 case Op_StoreVectorMasked:
3711 case Op_LoadVectorGatherMasked:
3712 case Op_StoreVectorScatterMasked:
3713 return true;
3714
3715 case Op_UMinV:
3716 case Op_UMaxV:
3717 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3718 return false;
3719 } // fallthrough
3720 case Op_MaxV:
3721 case Op_MinV:
3722 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3723 return false; // Implementation limitation
3724 }
3725 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3726 return false; // Implementation limitation
3727 }
3728 return true;
3729 case Op_SaturatingAddV:
3730 case Op_SaturatingSubV:
3731 if (!is_subword_type(bt)) {
3732 return false;
3733 }
3734 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3735 return false; // Implementation limitation
3736 }
3737 return true;
3738
3739 case Op_VectorMaskCmp:
3740 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3741 return false; // Implementation limitation
3742 }
3743 return true;
3744
3745 case Op_VectorRearrange:
3746 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3747 return false; // Implementation limitation
3748 }
3749 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3750 return false; // Implementation limitation
3751 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3752 return false; // Implementation limitation
3753 }
3754 return true;
3755
3756 // Binary Logical operations
3757 case Op_AndVMask:
3758 case Op_OrVMask:
3759 case Op_XorVMask:
3760 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3761 return false; // Implementation limitation
3762 }
3763 return true;
3764
3765 case Op_PopCountVI:
3766 case Op_PopCountVL:
3767 if (!is_pop_count_instr_target(bt)) {
3768 return false;
3769 }
3770 return true;
3771
3772 case Op_MaskAll:
3773 return true;
3774
3775 case Op_CountLeadingZerosV:
3776 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3777 return true;
3778 }
3779 default:
3780 return false;
3781 }
3782 }
3783
3784 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3785 return false;
3786 }
3787
3788 // Return true if Vector::rearrange needs preparation of the shuffle argument
3789 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3790 switch (elem_bt) {
3791 case T_BYTE: return false;
3792 case T_SHORT: return !VM_Version::supports_avx512bw();
3793 case T_INT: return !VM_Version::supports_avx();
3794 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3795 default:
3796 ShouldNotReachHere();
3797 return false;
3798 }
3799 }
3800
3801 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3802 // Prefer predicate if the mask type is "TypePVectMask".
3803 return vt->isa_pvectmask() != nullptr;
3804 }
3805
3806 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3807 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3808 bool legacy = (generic_opnd->opcode() == LEGVEC);
3809 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3810 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3811 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3812 return new legVecZOper();
3813 }
3814 if (legacy) {
3815 switch (ideal_reg) {
3816 case Op_VecS: return new legVecSOper();
3817 case Op_VecD: return new legVecDOper();
3818 case Op_VecX: return new legVecXOper();
3819 case Op_VecY: return new legVecYOper();
3820 case Op_VecZ: return new legVecZOper();
3821 }
3822 } else {
3823 switch (ideal_reg) {
3824 case Op_VecS: return new vecSOper();
3825 case Op_VecD: return new vecDOper();
3826 case Op_VecX: return new vecXOper();
3827 case Op_VecY: return new vecYOper();
3828 case Op_VecZ: return new vecZOper();
3829 }
3830 }
3831 ShouldNotReachHere();
3832 return nullptr;
3833 }
3834
3835 bool Matcher::is_reg2reg_move(MachNode* m) {
3836 switch (m->rule()) {
3837 case MoveVec2Leg_rule:
3838 case MoveLeg2Vec_rule:
3839 case MoveF2VL_rule:
3840 case MoveF2LEG_rule:
3841 case MoveVL2F_rule:
3842 case MoveLEG2F_rule:
3843 case MoveD2VL_rule:
3844 case MoveD2LEG_rule:
3845 case MoveVL2D_rule:
3846 case MoveLEG2D_rule:
3847 return true;
3848 default:
3849 return false;
3850 }
3851 }
3852
3853 bool Matcher::is_generic_vector(MachOper* opnd) {
3854 switch (opnd->opcode()) {
3855 case VEC:
3856 case LEGVEC:
3857 return true;
3858 default:
3859 return false;
3860 }
3861 }
3862
3863 //------------------------------------------------------------------------
3864
3865 const RegMask* Matcher::predicate_reg_mask(void) {
3866 return &_VECTMASK_REG_mask;
3867 }
3868
3869 // Max vector size in bytes. 0 if not supported.
3870 int Matcher::vector_width_in_bytes(BasicType bt) {
3871 assert(is_java_primitive(bt), "only primitive type vectors");
3872 // SSE2 supports 128bit vectors for all types.
3873 // AVX2 supports 256bit vectors for all types.
3874 // AVX2/EVEX supports 512bit vectors for all types.
3875 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3876 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3877 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3878 size = (UseAVX > 2) ? 64 : 32;
3879 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3880 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3881 // Use flag to limit vector size.
3882 size = MIN2(size,(int)MaxVectorSize);
3883 // Minimum 2 values in vector (or 4 for bytes).
3884 switch (bt) {
3885 case T_DOUBLE:
3886 case T_LONG:
3887 if (size < 16) return 0;
3888 break;
3889 case T_FLOAT:
3890 case T_INT:
3891 if (size < 8) return 0;
3892 break;
3893 case T_BOOLEAN:
3894 if (size < 4) return 0;
3895 break;
3896 case T_CHAR:
3897 if (size < 4) return 0;
3898 break;
3899 case T_BYTE:
3900 if (size < 4) return 0;
3901 break;
3902 case T_SHORT:
3903 if (size < 4) return 0;
3904 break;
3905 default:
3906 ShouldNotReachHere();
3907 }
3908 return size;
3909 }
3910
3911 // Limits on vector size (number of elements) loaded into vector.
3912 int Matcher::max_vector_size(const BasicType bt) {
3913 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3914 }
3915 int Matcher::min_vector_size(const BasicType bt) {
3916 int max_size = max_vector_size(bt);
3917 // Min size which can be loaded into vector is 4 bytes.
3918 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3919 // Support for calling svml double64 vectors
3920 if (bt == T_DOUBLE) {
3921 size = 1;
3922 }
3923 return MIN2(size,max_size);
3924 }
3925
3926 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3927 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3928 // by default on Cascade Lake
3929 if (VM_Version::is_default_intel_cascade_lake()) {
3930 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3931 }
3932 return Matcher::max_vector_size(bt);
3933 }
3934
3935 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3936 return -1;
3937 }
3938
3939 // Vector ideal reg corresponding to specified size in bytes
3940 uint Matcher::vector_ideal_reg(int size) {
3941 assert(MaxVectorSize >= size, "");
3942 switch(size) {
3943 case 4: return Op_VecS;
3944 case 8: return Op_VecD;
3945 case 16: return Op_VecX;
3946 case 32: return Op_VecY;
3947 case 64: return Op_VecZ;
3948 }
3949 ShouldNotReachHere();
3950 return 0;
3951 }
3952
3953 // Check for shift by small constant as well
3954 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3955 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3956 shift->in(2)->get_int() <= 3 &&
3957 // Are there other uses besides address expressions?
3958 !matcher->is_visited(shift)) {
3959 address_visited.set(shift->_idx); // Flag as address_visited
3960 mstack.push(shift->in(2), Matcher::Visit);
3961 Node *conv = shift->in(1);
3962 // Allow Matcher to match the rule which bypass
3963 // ConvI2L operation for an array index on LP64
3964 // if the index value is positive.
3965 if (conv->Opcode() == Op_ConvI2L &&
3966 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3967 // Are there other uses besides address expressions?
3968 !matcher->is_visited(conv)) {
3969 address_visited.set(conv->_idx); // Flag as address_visited
3970 mstack.push(conv->in(1), Matcher::Pre_Visit);
3971 } else {
3972 mstack.push(conv, Matcher::Pre_Visit);
3973 }
3974 return true;
3975 }
3976 return false;
3977 }
3978
3979 // This function identifies sub-graphs in which a 'load' node is
3980 // input to two different nodes, and such that it can be matched
3981 // with BMI instructions like blsi, blsr, etc.
3982 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3983 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3984 // refers to the same node.
3985 //
3986 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3987 // This is a temporary solution until we make DAGs expressible in ADL.
3988 template<typename ConType>
3989 class FusedPatternMatcher {
3990 Node* _op1_node;
3991 Node* _mop_node;
3992 int _con_op;
3993
3994 static int match_next(Node* n, int next_op, int next_op_idx) {
3995 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3996 return -1;
3997 }
3998
3999 if (next_op_idx == -1) { // n is commutative, try rotations
4000 if (n->in(1)->Opcode() == next_op) {
4001 return 1;
4002 } else if (n->in(2)->Opcode() == next_op) {
4003 return 2;
4004 }
4005 } else {
4006 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
4007 if (n->in(next_op_idx)->Opcode() == next_op) {
4008 return next_op_idx;
4009 }
4010 }
4011 return -1;
4012 }
4013
4014 public:
4015 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4016 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4017
4018 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4019 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4020 typename ConType::NativeType con_value) {
4021 if (_op1_node->Opcode() != op1) {
4022 return false;
4023 }
4024 if (_mop_node->outcnt() > 2) {
4025 return false;
4026 }
4027 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4028 if (op1_op2_idx == -1) {
4029 return false;
4030 }
4031 // Memory operation must be the other edge
4032 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4033
4034 // Check that the mop node is really what we want
4035 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4036 Node* op2_node = _op1_node->in(op1_op2_idx);
4037 if (op2_node->outcnt() > 1) {
4038 return false;
4039 }
4040 assert(op2_node->Opcode() == op2, "Should be");
4041 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4042 if (op2_con_idx == -1) {
4043 return false;
4044 }
4045 // Memory operation must be the other edge
4046 int op2_mop_idx = (op2_con_idx & 1) + 1;
4047 // Check that the memory operation is the same node
4048 if (op2_node->in(op2_mop_idx) == _mop_node) {
4049 // Now check the constant
4050 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4051 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4052 return true;
4053 }
4054 }
4055 }
4056 return false;
4057 }
4058 };
4059
4060 static bool is_bmi_pattern(Node* n, Node* m) {
4061 assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
4062 if (n != nullptr && m != nullptr) {
4063 if (m->Opcode() == Op_LoadI) {
4064 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4065 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4066 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4067 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4068 } else if (m->Opcode() == Op_LoadL) {
4069 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4070 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4071 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4072 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4073 }
4074 }
4075 return false;
4076 }
4077
4078 // Should the matcher clone input 'm' of node 'n'?
4079 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4080 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4081 if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
4082 mstack.push(m, Visit);
4083 return true;
4084 }
4085 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4086 mstack.push(m, Visit); // m = ShiftCntV
4087 return true;
4088 }
4089 if (is_encode_and_store_pattern(n, m)) {
4090 mstack.push(m, Visit);
4091 return true;
4092 }
4093 return false;
4094 }
4095
4096 // Should the Matcher clone shifts on addressing modes, expecting them
4097 // to be subsumed into complex addressing expressions or compute them
4098 // into registers?
4099 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4100 Node *off = m->in(AddPNode::Offset);
4101 if (off->is_Con()) {
4102 address_visited.test_set(m->_idx); // Flag as address_visited
4103 Node *adr = m->in(AddPNode::Address);
4104
4105 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4106 // AtomicAdd is not an addressing expression.
4107 // Cheap to find it by looking for screwy base.
4108 if (adr->is_AddP() &&
4109 !adr->in(AddPNode::Base)->is_top() &&
4110 !adr->in(AddPNode::Offset)->is_Con() &&
4111 off->get_long() == (int) (off->get_long()) && // immL32
4112 // Are there other uses besides address expressions?
4113 !is_visited(adr)) {
4114 address_visited.set(adr->_idx); // Flag as address_visited
4115 Node *shift = adr->in(AddPNode::Offset);
4116 if (!clone_shift(shift, this, mstack, address_visited)) {
4117 mstack.push(shift, Pre_Visit);
4118 }
4119 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4120 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4121 } else {
4122 mstack.push(adr, Pre_Visit);
4123 }
4124
4125 // Clone X+offset as it also folds into most addressing expressions
4126 mstack.push(off, Visit);
4127 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4128 return true;
4129 } else if (clone_shift(off, this, mstack, address_visited)) {
4130 address_visited.test_set(m->_idx); // Flag as address_visited
4131 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4132 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4133 return true;
4134 }
4135 return false;
4136 }
4137
4138 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4139 switch (bt) {
4140 case BoolTest::eq:
4141 return Assembler::eq;
4142 case BoolTest::ne:
4143 return Assembler::neq;
4144 case BoolTest::le:
4145 case BoolTest::ule:
4146 return Assembler::le;
4147 case BoolTest::ge:
4148 case BoolTest::uge:
4149 return Assembler::nlt;
4150 case BoolTest::lt:
4151 case BoolTest::ult:
4152 return Assembler::lt;
4153 case BoolTest::gt:
4154 case BoolTest::ugt:
4155 return Assembler::nle;
4156 default : ShouldNotReachHere(); return Assembler::_false;
4157 }
4158 }
4159
4160 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4161 switch (bt) {
4162 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4163 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4164 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4165 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4166 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4167 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4168 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4169 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4170 }
4171 }
4172
4173 // Helper methods for MachSpillCopyNode::implementation().
4174 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4175 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4176 assert(ireg == Op_VecS || // 32bit vector
4177 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4178 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4179 "no non-adjacent vector moves" );
4180 if (masm) {
4181 switch (ireg) {
4182 case Op_VecS: // copy whole register
4183 case Op_VecD:
4184 case Op_VecX:
4185 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4186 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4187 } else {
4188 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4189 }
4190 break;
4191 case Op_VecY:
4192 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4193 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4194 } else {
4195 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4196 }
4197 break;
4198 case Op_VecZ:
4199 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4200 break;
4201 default:
4202 ShouldNotReachHere();
4203 }
4204 #ifndef PRODUCT
4205 } else {
4206 switch (ireg) {
4207 case Op_VecS:
4208 case Op_VecD:
4209 case Op_VecX:
4210 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4211 break;
4212 case Op_VecY:
4213 case Op_VecZ:
4214 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4215 break;
4216 default:
4217 ShouldNotReachHere();
4218 }
4219 #endif
4220 }
4221 }
4222
4223 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4224 int stack_offset, int reg, uint ireg, outputStream* st) {
4225 if (masm) {
4226 if (is_load) {
4227 switch (ireg) {
4228 case Op_VecS:
4229 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4230 break;
4231 case Op_VecD:
4232 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4233 break;
4234 case Op_VecX:
4235 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4236 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4237 } else {
4238 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4239 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4240 }
4241 break;
4242 case Op_VecY:
4243 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4244 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4245 } else {
4246 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4247 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4248 }
4249 break;
4250 case Op_VecZ:
4251 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4252 break;
4253 default:
4254 ShouldNotReachHere();
4255 }
4256 } else { // store
4257 switch (ireg) {
4258 case Op_VecS:
4259 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4260 break;
4261 case Op_VecD:
4262 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4263 break;
4264 case Op_VecX:
4265 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4266 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4267 }
4268 else {
4269 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4270 }
4271 break;
4272 case Op_VecY:
4273 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4274 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4275 }
4276 else {
4277 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4278 }
4279 break;
4280 case Op_VecZ:
4281 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4282 break;
4283 default:
4284 ShouldNotReachHere();
4285 }
4286 }
4287 #ifndef PRODUCT
4288 } else {
4289 if (is_load) {
4290 switch (ireg) {
4291 case Op_VecS:
4292 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4293 break;
4294 case Op_VecD:
4295 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4296 break;
4297 case Op_VecX:
4298 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4299 break;
4300 case Op_VecY:
4301 case Op_VecZ:
4302 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4303 break;
4304 default:
4305 ShouldNotReachHere();
4306 }
4307 } else { // store
4308 switch (ireg) {
4309 case Op_VecS:
4310 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4311 break;
4312 case Op_VecD:
4313 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4314 break;
4315 case Op_VecX:
4316 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4317 break;
4318 case Op_VecY:
4319 case Op_VecZ:
4320 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4321 break;
4322 default:
4323 ShouldNotReachHere();
4324 }
4325 }
4326 #endif
4327 }
4328 }
4329
4330 template <class T>
4331 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4332 int size = type2aelembytes(bt) * len;
4333 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4334 for (int i = 0; i < len; i++) {
4335 int offset = i * type2aelembytes(bt);
4336 switch (bt) {
4337 case T_BYTE: val->at(i) = con; break;
4338 case T_SHORT: {
4339 jshort c = con;
4340 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4341 break;
4342 }
4343 case T_INT: {
4344 jint c = con;
4345 memcpy(val->adr_at(offset), &c, sizeof(jint));
4346 break;
4347 }
4348 case T_LONG: {
4349 jlong c = con;
4350 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4351 break;
4352 }
4353 case T_FLOAT: {
4354 jfloat c = con;
4355 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4356 break;
4357 }
4358 case T_DOUBLE: {
4359 jdouble c = con;
4360 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4361 break;
4362 }
4363 default: assert(false, "%s", type2name(bt));
4364 }
4365 }
4366 return val;
4367 }
4368
4369 static inline jlong high_bit_set(BasicType bt) {
4370 switch (bt) {
4371 case T_BYTE: return 0x8080808080808080;
4372 case T_SHORT: return 0x8000800080008000;
4373 case T_INT: return 0x8000000080000000;
4374 case T_LONG: return 0x8000000000000000;
4375 default:
4376 ShouldNotReachHere();
4377 return 0;
4378 }
4379 }
4380
4381 #ifndef PRODUCT
4382 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4383 st->print("nop \t# %d bytes pad for loops and calls", _count);
4384 }
4385 #endif
4386
4387 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4388 __ nop(_count);
4389 }
4390
4391 uint MachNopNode::size(PhaseRegAlloc*) const {
4392 return _count;
4393 }
4394
4395 #ifndef PRODUCT
4396 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4397 st->print("# breakpoint");
4398 }
4399 #endif
4400
4401 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4402 __ int3();
4403 }
4404
4405 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4406 return MachNode::size(ra_);
4407 }
4408
4409 %}
4410
4411 //----------ENCODING BLOCK-----------------------------------------------------
4412 // This block specifies the encoding classes used by the compiler to
4413 // output byte streams. Encoding classes are parameterized macros
4414 // used by Machine Instruction Nodes in order to generate the bit
4415 // encoding of the instruction. Operands specify their base encoding
4416 // interface with the interface keyword. There are currently
4417 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4418 // COND_INTER. REG_INTER causes an operand to generate a function
4419 // which returns its register number when queried. CONST_INTER causes
4420 // an operand to generate a function which returns the value of the
4421 // constant when queried. MEMORY_INTER causes an operand to generate
4422 // four functions which return the Base Register, the Index Register,
4423 // the Scale Value, and the Offset Value of the operand when queried.
4424 // COND_INTER causes an operand to generate six functions which return
4425 // the encoding code (ie - encoding bits for the instruction)
4426 // associated with each basic boolean condition for a conditional
4427 // instruction.
4428 //
4429 // Instructions specify two basic values for encoding. Again, a
4430 // function is available to check if the constant displacement is an
4431 // oop. They use the ins_encode keyword to specify their encoding
4432 // classes (which must be a sequence of enc_class names, and their
4433 // parameters, specified in the encoding block), and they use the
4434 // opcode keyword to specify, in order, their primary, secondary, and
4435 // tertiary opcode. Only the opcode sections which a particular
4436 // instruction needs for encoding need to be specified.
4437 encode %{
4438 enc_class cdql_enc(no_rax_rdx_RegI div)
4439 %{
4440 // Full implementation of Java idiv and irem; checks for
4441 // special case as described in JVM spec., p.243 & p.271.
4442 //
4443 // normal case special case
4444 //
4445 // input : rax: dividend min_int
4446 // reg: divisor -1
4447 //
4448 // output: rax: quotient (= rax idiv reg) min_int
4449 // rdx: remainder (= rax irem reg) 0
4450 //
4451 // Code sequnce:
4452 //
4453 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4454 // 5: 75 07/08 jne e <normal>
4455 // 7: 33 d2 xor %edx,%edx
4456 // [div >= 8 -> offset + 1]
4457 // [REX_B]
4458 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4459 // c: 74 03/04 je 11 <done>
4460 // 000000000000000e <normal>:
4461 // e: 99 cltd
4462 // [div >= 8 -> offset + 1]
4463 // [REX_B]
4464 // f: f7 f9 idiv $div
4465 // 0000000000000011 <done>:
4466 Label normal;
4467 Label done;
4468
4469 // cmp $0x80000000,%eax
4470 __ cmpl(as_Register(RAX_enc), 0x80000000);
4471
4472 // jne e <normal>
4473 __ jccb(Assembler::notEqual, normal);
4474
4475 // xor %edx,%edx
4476 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4477
4478 // cmp $0xffffffffffffffff,%ecx
4479 __ cmpl($div$$Register, -1);
4480
4481 // je 11 <done>
4482 __ jccb(Assembler::equal, done);
4483
4484 // <normal>
4485 // cltd
4486 __ bind(normal);
4487 __ cdql();
4488
4489 // idivl
4490 // <done>
4491 __ idivl($div$$Register);
4492 __ bind(done);
4493 %}
4494
4495 enc_class cdqq_enc(no_rax_rdx_RegL div)
4496 %{
4497 // Full implementation of Java ldiv and lrem; checks for
4498 // special case as described in JVM spec., p.243 & p.271.
4499 //
4500 // normal case special case
4501 //
4502 // input : rax: dividend min_long
4503 // reg: divisor -1
4504 //
4505 // output: rax: quotient (= rax idiv reg) min_long
4506 // rdx: remainder (= rax irem reg) 0
4507 //
4508 // Code sequnce:
4509 //
4510 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4511 // 7: 00 00 80
4512 // a: 48 39 d0 cmp %rdx,%rax
4513 // d: 75 08 jne 17 <normal>
4514 // f: 33 d2 xor %edx,%edx
4515 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4516 // 15: 74 05 je 1c <done>
4517 // 0000000000000017 <normal>:
4518 // 17: 48 99 cqto
4519 // 19: 48 f7 f9 idiv $div
4520 // 000000000000001c <done>:
4521 Label normal;
4522 Label done;
4523
4524 // mov $0x8000000000000000,%rdx
4525 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4526
4527 // cmp %rdx,%rax
4528 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4529
4530 // jne 17 <normal>
4531 __ jccb(Assembler::notEqual, normal);
4532
4533 // xor %edx,%edx
4534 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4535
4536 // cmp $0xffffffffffffffff,$div
4537 __ cmpq($div$$Register, -1);
4538
4539 // je 1e <done>
4540 __ jccb(Assembler::equal, done);
4541
4542 // <normal>
4543 // cqto
4544 __ bind(normal);
4545 __ cdqq();
4546
4547 // idivq (note: must be emitted by the user of this rule)
4548 // <done>
4549 __ idivq($div$$Register);
4550 __ bind(done);
4551 %}
4552
4553 enc_class clear_avx %{
4554 DEBUG_ONLY(int off0 = __ offset());
4555 if (generate_vzeroupper(Compile::current())) {
4556 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4557 // Clear upper bits of YMM registers when current compiled code uses
4558 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4559 __ vzeroupper();
4560 }
4561 DEBUG_ONLY(int off1 = __ offset());
4562 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4563 %}
4564
4565 enc_class Java_To_Runtime(method meth) %{
4566 __ lea(r10, RuntimeAddress((address)$meth$$method));
4567 __ call(r10);
4568 __ post_call_nop();
4569 %}
4570
4571 enc_class Java_Static_Call(method meth)
4572 %{
4573 // JAVA STATIC CALL
4574 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4575 // determine who we intended to call.
4576 if (!_method) {
4577 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4578 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4579 // The NOP here is purely to ensure that eliding a call to
4580 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4581 __ nop(5);
4582 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4583 } else {
4584 int method_index = resolved_method_index(masm);
4585 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4586 : static_call_Relocation::spec(method_index);
4587 address mark = __ pc();
4588 int call_offset = __ offset();
4589 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4590 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4591 // Calls of the same statically bound method can share
4592 // a stub to the interpreter.
4593 __ code()->shared_stub_to_interp_for(_method, call_offset);
4594 } else {
4595 // Emit stubs for static call.
4596 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4597 __ clear_inst_mark();
4598 if (stub == nullptr) {
4599 ciEnv::current()->record_failure("CodeCache is full");
4600 return;
4601 }
4602 }
4603 }
4604 __ post_call_nop();
4605 %}
4606
4607 enc_class Java_Dynamic_Call(method meth) %{
4608 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4609 __ post_call_nop();
4610 %}
4611
4612 enc_class call_epilog %{
4613 if (VerifyStackAtCalls) {
4614 // Check that stack depth is unchanged: find majik cookie on stack
4615 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4616 Label L;
4617 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4618 __ jccb(Assembler::equal, L);
4619 // Die if stack mismatch
4620 __ int3();
4621 __ bind(L);
4622 }
4623 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4624 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4625 // Search for the corresponding projection, get the register and emit code that initializes it.
4626 uint con = (tf()->range_cc()->cnt() - 1);
4627 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4628 ProjNode* proj = fast_out(i)->as_Proj();
4629 if (proj->_con == con) {
4630 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4631 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4632 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4633 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4634 __ testq(rax, rax);
4635 __ setb(Assembler::notZero, toReg);
4636 __ movzbl(toReg, toReg);
4637 if (reg->is_stack()) {
4638 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4639 __ movq(Address(rsp, st_off), toReg);
4640 }
4641 break;
4642 }
4643 }
4644 if (return_value_is_used()) {
4645 // An inline type is returned as fields in multiple registers.
4646 // Rax either contains an oop if the inline type is buffered or a pointer
4647 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4648 // if the lowest bit is set to allow C2 to use the oop after null checking.
4649 // rax &= (rax & 1) - 1
4650 __ movptr(rscratch1, rax);
4651 __ andptr(rscratch1, 0x1);
4652 __ subptr(rscratch1, 0x1);
4653 __ andptr(rax, rscratch1);
4654 }
4655 }
4656 %}
4657
4658 %}
4659
4660 //----------FRAME--------------------------------------------------------------
4661 // Definition of frame structure and management information.
4662 //
4663 // S T A C K L A Y O U T Allocators stack-slot number
4664 // | (to get allocators register number
4665 // G Owned by | | v add OptoReg::stack0())
4666 // r CALLER | |
4667 // o | +--------+ pad to even-align allocators stack-slot
4668 // w V | pad0 | numbers; owned by CALLER
4669 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4670 // h ^ | in | 5
4671 // | | args | 4 Holes in incoming args owned by SELF
4672 // | | | | 3
4673 // | | +--------+
4674 // V | | old out| Empty on Intel, window on Sparc
4675 // | old |preserve| Must be even aligned.
4676 // | SP-+--------+----> Matcher::_old_SP, even aligned
4677 // | | in | 3 area for Intel ret address
4678 // Owned by |preserve| Empty on Sparc.
4679 // SELF +--------+
4680 // | | pad2 | 2 pad to align old SP
4681 // | +--------+ 1
4682 // | | locks | 0
4683 // | +--------+----> OptoReg::stack0(), even aligned
4684 // | | pad1 | 11 pad to align new SP
4685 // | +--------+
4686 // | | | 10
4687 // | | spills | 9 spills
4688 // V | | 8 (pad0 slot for callee)
4689 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4690 // ^ | out | 7
4691 // | | args | 6 Holes in outgoing args owned by CALLEE
4692 // Owned by +--------+
4693 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4694 // | new |preserve| Must be even-aligned.
4695 // | SP-+--------+----> Matcher::_new_SP, even aligned
4696 // | | |
4697 //
4698 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4699 // known from SELF's arguments and the Java calling convention.
4700 // Region 6-7 is determined per call site.
4701 // Note 2: If the calling convention leaves holes in the incoming argument
4702 // area, those holes are owned by SELF. Holes in the outgoing area
4703 // are owned by the CALLEE. Holes should not be necessary in the
4704 // incoming area, as the Java calling convention is completely under
4705 // the control of the AD file. Doubles can be sorted and packed to
4706 // avoid holes. Holes in the outgoing arguments may be necessary for
4707 // varargs C calling conventions.
4708 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4709 // even aligned with pad0 as needed.
4710 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4711 // region 6-11 is even aligned; it may be padded out more so that
4712 // the region from SP to FP meets the minimum stack alignment.
4713 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4714 // alignment. Region 11, pad1, may be dynamically extended so that
4715 // SP meets the minimum alignment.
4716
4717 frame
4718 %{
4719 // These three registers define part of the calling convention
4720 // between compiled code and the interpreter.
4721 inline_cache_reg(RAX); // Inline Cache Register
4722
4723 // Optional: name the operand used by cisc-spilling to access
4724 // [stack_pointer + offset]
4725 cisc_spilling_operand_name(indOffset32);
4726
4727 // Number of stack slots consumed by locking an object
4728 sync_stack_slots(2);
4729
4730 // Compiled code's Frame Pointer
4731 frame_pointer(RSP);
4732
4733 // Stack alignment requirement
4734 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4735
4736 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4737 // for calls to C. Supports the var-args backing area for register parms.
4738 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4739
4740 // The after-PROLOG location of the return address. Location of
4741 // return address specifies a type (REG or STACK) and a number
4742 // representing the register number (i.e. - use a register name) or
4743 // stack slot.
4744 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4745 // Otherwise, it is above the locks and verification slot and alignment word
4746 return_addr(STACK - 2 +
4747 align_up((Compile::current()->in_preserve_stack_slots() +
4748 Compile::current()->fixed_slots()),
4749 stack_alignment_in_slots()));
4750
4751 // Location of compiled Java return values. Same as C for now.
4752 return_value
4753 %{
4754 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4755 "only return normal values");
4756
4757 static const int lo[Op_RegL + 1] = {
4758 0,
4759 0,
4760 RAX_num, // Op_RegN
4761 RAX_num, // Op_RegI
4762 RAX_num, // Op_RegP
4763 XMM0_num, // Op_RegF
4764 XMM0_num, // Op_RegD
4765 RAX_num // Op_RegL
4766 };
4767 static const int hi[Op_RegL + 1] = {
4768 0,
4769 0,
4770 OptoReg::Bad, // Op_RegN
4771 OptoReg::Bad, // Op_RegI
4772 RAX_H_num, // Op_RegP
4773 OptoReg::Bad, // Op_RegF
4774 XMM0b_num, // Op_RegD
4775 RAX_H_num // Op_RegL
4776 };
4777 // Excluded flags and vector registers.
4778 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4779 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4780 %}
4781 %}
4782
4783 //----------ATTRIBUTES---------------------------------------------------------
4784 //----------Operand Attributes-------------------------------------------------
4785 op_attrib op_cost(0); // Required cost attribute
4786
4787 //----------Instruction Attributes---------------------------------------------
4788 ins_attrib ins_cost(100); // Required cost attribute
4789 ins_attrib ins_size(8); // Required size attribute (in bits)
4790 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4791 // a non-matching short branch variant
4792 // of some long branch?
4793 ins_attrib ins_alignment(1); // Required alignment attribute (must
4794 // be a power of 2) specifies the
4795 // alignment that some part of the
4796 // instruction (not necessarily the
4797 // start) requires. If > 1, a
4798 // compute_padding() function must be
4799 // provided for the instruction
4800
4801 // Whether this node is expanded during code emission into a sequence of
4802 // instructions and the first instruction can perform an implicit null check.
4803 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4804
4805 //----------OPERANDS-----------------------------------------------------------
4806 // Operand definitions must precede instruction definitions for correct parsing
4807 // in the ADLC because operands constitute user defined types which are used in
4808 // instruction definitions.
4809
4810 //----------Simple Operands----------------------------------------------------
4811 // Immediate Operands
4812 // Integer Immediate
4813 operand immI()
4814 %{
4815 match(ConI);
4816
4817 op_cost(10);
4818 format %{ %}
4819 interface(CONST_INTER);
4820 %}
4821
4822 // Constant for test vs zero
4823 operand immI_0()
4824 %{
4825 predicate(n->get_int() == 0);
4826 match(ConI);
4827
4828 op_cost(0);
4829 format %{ %}
4830 interface(CONST_INTER);
4831 %}
4832
4833 // Constant for increment
4834 operand immI_1()
4835 %{
4836 predicate(n->get_int() == 1);
4837 match(ConI);
4838
4839 op_cost(0);
4840 format %{ %}
4841 interface(CONST_INTER);
4842 %}
4843
4844 // Constant for decrement
4845 operand immI_M1()
4846 %{
4847 predicate(n->get_int() == -1);
4848 match(ConI);
4849
4850 op_cost(0);
4851 format %{ %}
4852 interface(CONST_INTER);
4853 %}
4854
4855 operand immI_2()
4856 %{
4857 predicate(n->get_int() == 2);
4858 match(ConI);
4859
4860 op_cost(0);
4861 format %{ %}
4862 interface(CONST_INTER);
4863 %}
4864
4865 operand immI_4()
4866 %{
4867 predicate(n->get_int() == 4);
4868 match(ConI);
4869
4870 op_cost(0);
4871 format %{ %}
4872 interface(CONST_INTER);
4873 %}
4874
4875 operand immI_8()
4876 %{
4877 predicate(n->get_int() == 8);
4878 match(ConI);
4879
4880 op_cost(0);
4881 format %{ %}
4882 interface(CONST_INTER);
4883 %}
4884
4885 // Valid scale values for addressing modes
4886 operand immI2()
4887 %{
4888 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4889 match(ConI);
4890
4891 format %{ %}
4892 interface(CONST_INTER);
4893 %}
4894
4895 operand immU7()
4896 %{
4897 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4898 match(ConI);
4899
4900 op_cost(5);
4901 format %{ %}
4902 interface(CONST_INTER);
4903 %}
4904
4905 operand immI8()
4906 %{
4907 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4908 match(ConI);
4909
4910 op_cost(5);
4911 format %{ %}
4912 interface(CONST_INTER);
4913 %}
4914
4915 operand immU8()
4916 %{
4917 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4918 match(ConI);
4919
4920 op_cost(5);
4921 format %{ %}
4922 interface(CONST_INTER);
4923 %}
4924
4925 operand immI16()
4926 %{
4927 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4928 match(ConI);
4929
4930 op_cost(10);
4931 format %{ %}
4932 interface(CONST_INTER);
4933 %}
4934
4935 // Int Immediate non-negative
4936 operand immU31()
4937 %{
4938 predicate(n->get_int() >= 0);
4939 match(ConI);
4940
4941 op_cost(0);
4942 format %{ %}
4943 interface(CONST_INTER);
4944 %}
4945
4946 // Pointer Immediate
4947 operand immP()
4948 %{
4949 match(ConP);
4950
4951 op_cost(10);
4952 format %{ %}
4953 interface(CONST_INTER);
4954 %}
4955
4956 // Null Pointer Immediate
4957 operand immP0()
4958 %{
4959 predicate(n->get_ptr() == 0);
4960 match(ConP);
4961
4962 op_cost(5);
4963 format %{ %}
4964 interface(CONST_INTER);
4965 %}
4966
4967 // Pointer Immediate
4968 operand immN() %{
4969 match(ConN);
4970
4971 op_cost(10);
4972 format %{ %}
4973 interface(CONST_INTER);
4974 %}
4975
4976 operand immNKlass() %{
4977 match(ConNKlass);
4978
4979 op_cost(10);
4980 format %{ %}
4981 interface(CONST_INTER);
4982 %}
4983
4984 // Null Pointer Immediate
4985 operand immN0() %{
4986 predicate(n->get_narrowcon() == 0);
4987 match(ConN);
4988
4989 op_cost(5);
4990 format %{ %}
4991 interface(CONST_INTER);
4992 %}
4993
4994 operand immP31()
4995 %{
4996 predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
4997 && (n->get_ptr() >> 31) == 0);
4998 match(ConP);
4999
5000 op_cost(5);
5001 format %{ %}
5002 interface(CONST_INTER);
5003 %}
5004
5005
5006 // Long Immediate
5007 operand immL()
5008 %{
5009 match(ConL);
5010
5011 op_cost(20);
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 // Long Immediate 8-bit
5017 operand immL8()
5018 %{
5019 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5020 match(ConL);
5021
5022 op_cost(5);
5023 format %{ %}
5024 interface(CONST_INTER);
5025 %}
5026
5027 // Long Immediate 32-bit unsigned
5028 operand immUL32()
5029 %{
5030 predicate(n->get_long() == (unsigned int) (n->get_long()));
5031 match(ConL);
5032
5033 op_cost(10);
5034 format %{ %}
5035 interface(CONST_INTER);
5036 %}
5037
5038 // Long Immediate 32-bit signed
5039 operand immL32()
5040 %{
5041 predicate(n->get_long() == (int) (n->get_long()));
5042 match(ConL);
5043
5044 op_cost(15);
5045 format %{ %}
5046 interface(CONST_INTER);
5047 %}
5048
5049 operand immL_Pow2()
5050 %{
5051 predicate(is_power_of_2((julong)n->get_long()));
5052 match(ConL);
5053
5054 op_cost(15);
5055 format %{ %}
5056 interface(CONST_INTER);
5057 %}
5058
5059 operand immL_NotPow2()
5060 %{
5061 predicate(is_power_of_2((julong)~n->get_long()));
5062 match(ConL);
5063
5064 op_cost(15);
5065 format %{ %}
5066 interface(CONST_INTER);
5067 %}
5068
5069 // Long Immediate zero
5070 operand immL0()
5071 %{
5072 predicate(n->get_long() == 0L);
5073 match(ConL);
5074
5075 op_cost(10);
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Constant for increment
5081 operand immL1()
5082 %{
5083 predicate(n->get_long() == 1);
5084 match(ConL);
5085
5086 format %{ %}
5087 interface(CONST_INTER);
5088 %}
5089
5090 // Constant for decrement
5091 operand immL_M1()
5092 %{
5093 predicate(n->get_long() == -1);
5094 match(ConL);
5095
5096 format %{ %}
5097 interface(CONST_INTER);
5098 %}
5099
5100 // Long Immediate: low 32-bit mask
5101 operand immL_32bits()
5102 %{
5103 predicate(n->get_long() == 0xFFFFFFFFL);
5104 match(ConL);
5105 op_cost(20);
5106
5107 format %{ %}
5108 interface(CONST_INTER);
5109 %}
5110
5111 // Int Immediate: 2^n-1, positive
5112 operand immI_Pow2M1()
5113 %{
5114 predicate((n->get_int() > 0)
5115 && is_power_of_2((juint)n->get_int() + 1));
5116 match(ConI);
5117
5118 op_cost(20);
5119 format %{ %}
5120 interface(CONST_INTER);
5121 %}
5122
5123 // Float Immediate zero
5124 operand immF0()
5125 %{
5126 predicate(jint_cast(n->getf()) == 0);
5127 match(ConF);
5128
5129 op_cost(5);
5130 format %{ %}
5131 interface(CONST_INTER);
5132 %}
5133
5134 // Float Immediate
5135 operand immF()
5136 %{
5137 match(ConF);
5138
5139 op_cost(15);
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 // Half Float Immediate
5145 operand immH()
5146 %{
5147 match(ConH);
5148
5149 op_cost(15);
5150 format %{ %}
5151 interface(CONST_INTER);
5152 %}
5153
5154 // Double Immediate zero
5155 operand immD0()
5156 %{
5157 predicate(jlong_cast(n->getd()) == 0);
5158 match(ConD);
5159
5160 op_cost(5);
5161 format %{ %}
5162 interface(CONST_INTER);
5163 %}
5164
5165 // Double Immediate
5166 operand immD()
5167 %{
5168 match(ConD);
5169
5170 op_cost(15);
5171 format %{ %}
5172 interface(CONST_INTER);
5173 %}
5174
5175 // Immediates for special shifts (sign extend)
5176
5177 // Constants for increment
5178 operand immI_16()
5179 %{
5180 predicate(n->get_int() == 16);
5181 match(ConI);
5182
5183 format %{ %}
5184 interface(CONST_INTER);
5185 %}
5186
5187 operand immI_24()
5188 %{
5189 predicate(n->get_int() == 24);
5190 match(ConI);
5191
5192 format %{ %}
5193 interface(CONST_INTER);
5194 %}
5195
5196 // Constant for byte-wide masking
5197 operand immI_255()
5198 %{
5199 predicate(n->get_int() == 255);
5200 match(ConI);
5201
5202 format %{ %}
5203 interface(CONST_INTER);
5204 %}
5205
5206 // Constant for short-wide masking
5207 operand immI_65535()
5208 %{
5209 predicate(n->get_int() == 65535);
5210 match(ConI);
5211
5212 format %{ %}
5213 interface(CONST_INTER);
5214 %}
5215
5216 // Constant for byte-wide masking
5217 operand immL_255()
5218 %{
5219 predicate(n->get_long() == 255);
5220 match(ConL);
5221
5222 format %{ %}
5223 interface(CONST_INTER);
5224 %}
5225
5226 // Constant for short-wide masking
5227 operand immL_65535()
5228 %{
5229 predicate(n->get_long() == 65535);
5230 match(ConL);
5231
5232 format %{ %}
5233 interface(CONST_INTER);
5234 %}
5235
5236 // AOT Runtime Constants Address
5237 operand immAOTRuntimeConstantsAddress()
5238 %{
5239 // Check if the address is in the range of AOT Runtime Constants
5240 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5241 match(ConP);
5242
5243 op_cost(0);
5244 format %{ %}
5245 interface(CONST_INTER);
5246 %}
5247
5248 operand kReg()
5249 %{
5250 constraint(ALLOC_IN_RC(vectmask_reg));
5251 match(RegVectMask);
5252 format %{%}
5253 interface(REG_INTER);
5254 %}
5255
5256 // Register Operands
5257 // Integer Register
5258 operand rRegI()
5259 %{
5260 constraint(ALLOC_IN_RC(int_reg));
5261 match(RegI);
5262
5263 match(rax_RegI);
5264 match(rbx_RegI);
5265 match(rcx_RegI);
5266 match(rdx_RegI);
5267 match(rdi_RegI);
5268
5269 format %{ %}
5270 interface(REG_INTER);
5271 %}
5272
5273 // Special Registers
5274 operand rax_RegI()
5275 %{
5276 constraint(ALLOC_IN_RC(int_rax_reg));
5277 match(RegI);
5278 match(rRegI);
5279
5280 format %{ "RAX" %}
5281 interface(REG_INTER);
5282 %}
5283
5284 // Special Registers
5285 operand rbx_RegI()
5286 %{
5287 constraint(ALLOC_IN_RC(int_rbx_reg));
5288 match(RegI);
5289 match(rRegI);
5290
5291 format %{ "RBX" %}
5292 interface(REG_INTER);
5293 %}
5294
5295 operand rcx_RegI()
5296 %{
5297 constraint(ALLOC_IN_RC(int_rcx_reg));
5298 match(RegI);
5299 match(rRegI);
5300
5301 format %{ "RCX" %}
5302 interface(REG_INTER);
5303 %}
5304
5305 operand rdx_RegI()
5306 %{
5307 constraint(ALLOC_IN_RC(int_rdx_reg));
5308 match(RegI);
5309 match(rRegI);
5310
5311 format %{ "RDX" %}
5312 interface(REG_INTER);
5313 %}
5314
5315 operand rdi_RegI()
5316 %{
5317 constraint(ALLOC_IN_RC(int_rdi_reg));
5318 match(RegI);
5319 match(rRegI);
5320
5321 format %{ "RDI" %}
5322 interface(REG_INTER);
5323 %}
5324
5325 operand no_rax_rdx_RegI()
5326 %{
5327 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5328 match(RegI);
5329 match(rbx_RegI);
5330 match(rcx_RegI);
5331 match(rdi_RegI);
5332
5333 format %{ %}
5334 interface(REG_INTER);
5335 %}
5336
5337 operand no_rbp_r13_RegI()
5338 %{
5339 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5340 match(RegI);
5341 match(rRegI);
5342 match(rax_RegI);
5343 match(rbx_RegI);
5344 match(rcx_RegI);
5345 match(rdx_RegI);
5346 match(rdi_RegI);
5347
5348 format %{ %}
5349 interface(REG_INTER);
5350 %}
5351
5352 // Pointer Register
5353 operand any_RegP()
5354 %{
5355 constraint(ALLOC_IN_RC(any_reg));
5356 match(RegP);
5357 match(rax_RegP);
5358 match(rbx_RegP);
5359 match(rdi_RegP);
5360 match(rsi_RegP);
5361 match(rbp_RegP);
5362 match(r15_RegP);
5363 match(rRegP);
5364
5365 format %{ %}
5366 interface(REG_INTER);
5367 %}
5368
5369 operand rRegP()
5370 %{
5371 constraint(ALLOC_IN_RC(ptr_reg));
5372 match(RegP);
5373 match(rax_RegP);
5374 match(rbx_RegP);
5375 match(rdi_RegP);
5376 match(rsi_RegP);
5377 match(rbp_RegP); // See Q&A below about
5378 match(r15_RegP); // r15_RegP and rbp_RegP.
5379
5380 format %{ %}
5381 interface(REG_INTER);
5382 %}
5383
5384 operand rRegN() %{
5385 constraint(ALLOC_IN_RC(int_reg));
5386 match(RegN);
5387
5388 format %{ %}
5389 interface(REG_INTER);
5390 %}
5391
5392 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5393 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5394 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5395 // The output of an instruction is controlled by the allocator, which respects
5396 // register class masks, not match rules. Unless an instruction mentions
5397 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5398 // by the allocator as an input.
5399 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5400 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5401 // result, RBP is not included in the output of the instruction either.
5402
5403 // This operand is not allowed to use RBP even if
5404 // RBP is not used to hold the frame pointer.
5405 operand no_rbp_RegP()
5406 %{
5407 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5408 match(RegP);
5409 match(rbx_RegP);
5410 match(rsi_RegP);
5411 match(rdi_RegP);
5412
5413 format %{ %}
5414 interface(REG_INTER);
5415 %}
5416
5417 // Special Registers
5418 // Return a pointer value
5419 operand rax_RegP()
5420 %{
5421 constraint(ALLOC_IN_RC(ptr_rax_reg));
5422 match(RegP);
5423 match(rRegP);
5424
5425 format %{ %}
5426 interface(REG_INTER);
5427 %}
5428
5429 // Special Registers
5430 // Return a compressed pointer value
5431 operand rax_RegN()
5432 %{
5433 constraint(ALLOC_IN_RC(int_rax_reg));
5434 match(RegN);
5435 match(rRegN);
5436
5437 format %{ %}
5438 interface(REG_INTER);
5439 %}
5440
5441 // Used in AtomicAdd
5442 operand rbx_RegP()
5443 %{
5444 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5445 match(RegP);
5446 match(rRegP);
5447
5448 format %{ %}
5449 interface(REG_INTER);
5450 %}
5451
5452 operand rsi_RegP()
5453 %{
5454 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5455 match(RegP);
5456 match(rRegP);
5457
5458 format %{ %}
5459 interface(REG_INTER);
5460 %}
5461
5462 operand rbp_RegP()
5463 %{
5464 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5465 match(RegP);
5466 match(rRegP);
5467
5468 format %{ %}
5469 interface(REG_INTER);
5470 %}
5471
5472 // Used in rep stosq
5473 operand rdi_RegP()
5474 %{
5475 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5476 match(RegP);
5477 match(rRegP);
5478
5479 format %{ %}
5480 interface(REG_INTER);
5481 %}
5482
5483 operand r15_RegP()
5484 %{
5485 constraint(ALLOC_IN_RC(ptr_r15_reg));
5486 match(RegP);
5487 match(rRegP);
5488
5489 format %{ %}
5490 interface(REG_INTER);
5491 %}
5492
5493 operand rRegL()
5494 %{
5495 constraint(ALLOC_IN_RC(long_reg));
5496 match(RegL);
5497 match(rax_RegL);
5498 match(rdx_RegL);
5499
5500 format %{ %}
5501 interface(REG_INTER);
5502 %}
5503
5504 // Special Registers
5505 operand no_rax_rdx_RegL()
5506 %{
5507 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5508 match(RegL);
5509 match(rRegL);
5510
5511 format %{ %}
5512 interface(REG_INTER);
5513 %}
5514
5515 operand rax_RegL()
5516 %{
5517 constraint(ALLOC_IN_RC(long_rax_reg));
5518 match(RegL);
5519 match(rRegL);
5520
5521 format %{ "RAX" %}
5522 interface(REG_INTER);
5523 %}
5524
5525 operand rcx_RegL()
5526 %{
5527 constraint(ALLOC_IN_RC(long_rcx_reg));
5528 match(RegL);
5529 match(rRegL);
5530
5531 format %{ %}
5532 interface(REG_INTER);
5533 %}
5534
5535 operand rdx_RegL()
5536 %{
5537 constraint(ALLOC_IN_RC(long_rdx_reg));
5538 match(RegL);
5539 match(rRegL);
5540
5541 format %{ %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand r11_RegL()
5546 %{
5547 constraint(ALLOC_IN_RC(long_r11_reg));
5548 match(RegL);
5549 match(rRegL);
5550
5551 format %{ %}
5552 interface(REG_INTER);
5553 %}
5554
5555 operand no_rbp_r13_RegL()
5556 %{
5557 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5558 match(RegL);
5559 match(rRegL);
5560 match(rax_RegL);
5561 match(rcx_RegL);
5562 match(rdx_RegL);
5563
5564 format %{ %}
5565 interface(REG_INTER);
5566 %}
5567
5568 // Flags register, used as output of compare instructions
5569 operand rFlagsReg()
5570 %{
5571 constraint(ALLOC_IN_RC(int_flags));
5572 match(RegFlags);
5573
5574 format %{ "RFLAGS" %}
5575 interface(REG_INTER);
5576 %}
5577
5578 // Flags register, used as output of FLOATING POINT compare instructions
5579 operand rFlagsRegU()
5580 %{
5581 constraint(ALLOC_IN_RC(int_flags));
5582 match(RegFlags);
5583
5584 format %{ "RFLAGS_U" %}
5585 interface(REG_INTER);
5586 %}
5587
5588 operand rFlagsRegUCF() %{
5589 constraint(ALLOC_IN_RC(int_flags));
5590 match(RegFlags);
5591 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5592
5593 format %{ "RFLAGS_U_CF" %}
5594 interface(REG_INTER);
5595 %}
5596
5597 operand rFlagsRegUCFE() %{
5598 constraint(ALLOC_IN_RC(int_flags));
5599 match(RegFlags);
5600 predicate(UseAPX && VM_Version::supports_avx10_2());
5601
5602 format %{ "RFLAGS_U_CFE" %}
5603 interface(REG_INTER);
5604 %}
5605
5606 // Float register operands
5607 operand regF() %{
5608 constraint(ALLOC_IN_RC(float_reg));
5609 match(RegF);
5610
5611 format %{ %}
5612 interface(REG_INTER);
5613 %}
5614
5615 // Float register operands
5616 operand legRegF() %{
5617 constraint(ALLOC_IN_RC(float_reg_legacy));
5618 match(RegF);
5619
5620 format %{ %}
5621 interface(REG_INTER);
5622 %}
5623
5624 // Float register operands
5625 operand vlRegF() %{
5626 constraint(ALLOC_IN_RC(float_reg_vl));
5627 match(RegF);
5628
5629 format %{ %}
5630 interface(REG_INTER);
5631 %}
5632
5633 // Double register operands
5634 operand regD() %{
5635 constraint(ALLOC_IN_RC(double_reg));
5636 match(RegD);
5637
5638 format %{ %}
5639 interface(REG_INTER);
5640 %}
5641
5642 // Double register operands
5643 operand legRegD() %{
5644 constraint(ALLOC_IN_RC(double_reg_legacy));
5645 match(RegD);
5646
5647 format %{ %}
5648 interface(REG_INTER);
5649 %}
5650
5651 // Double register operands
5652 operand vlRegD() %{
5653 constraint(ALLOC_IN_RC(double_reg_vl));
5654 match(RegD);
5655
5656 format %{ %}
5657 interface(REG_INTER);
5658 %}
5659
5660 //----------Memory Operands----------------------------------------------------
5661 // Direct Memory Operand
5662 // operand direct(immP addr)
5663 // %{
5664 // match(addr);
5665
5666 // format %{ "[$addr]" %}
5667 // interface(MEMORY_INTER) %{
5668 // base(0xFFFFFFFF);
5669 // index(0x4);
5670 // scale(0x0);
5671 // disp($addr);
5672 // %}
5673 // %}
5674
5675 // Indirect Memory Operand
5676 operand indirect(any_RegP reg)
5677 %{
5678 constraint(ALLOC_IN_RC(ptr_reg));
5679 match(reg);
5680
5681 format %{ "[$reg]" %}
5682 interface(MEMORY_INTER) %{
5683 base($reg);
5684 index(0x4);
5685 scale(0x0);
5686 disp(0x0);
5687 %}
5688 %}
5689
5690 // Indirect Memory Plus Short Offset Operand
5691 operand indOffset8(any_RegP reg, immL8 off)
5692 %{
5693 constraint(ALLOC_IN_RC(ptr_reg));
5694 match(AddP reg off);
5695
5696 format %{ "[$reg + $off (8-bit)]" %}
5697 interface(MEMORY_INTER) %{
5698 base($reg);
5699 index(0x4);
5700 scale(0x0);
5701 disp($off);
5702 %}
5703 %}
5704
5705 // Indirect Memory Plus Long Offset Operand
5706 operand indOffset32(any_RegP reg, immL32 off)
5707 %{
5708 constraint(ALLOC_IN_RC(ptr_reg));
5709 match(AddP reg off);
5710
5711 format %{ "[$reg + $off (32-bit)]" %}
5712 interface(MEMORY_INTER) %{
5713 base($reg);
5714 index(0x4);
5715 scale(0x0);
5716 disp($off);
5717 %}
5718 %}
5719
5720 // Indirect Memory Plus Index Register Plus Offset Operand
5721 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5722 %{
5723 constraint(ALLOC_IN_RC(ptr_reg));
5724 match(AddP (AddP reg lreg) off);
5725
5726 op_cost(10);
5727 format %{"[$reg + $off + $lreg]" %}
5728 interface(MEMORY_INTER) %{
5729 base($reg);
5730 index($lreg);
5731 scale(0x0);
5732 disp($off);
5733 %}
5734 %}
5735
5736 // Indirect Memory Plus Index Register Plus Offset Operand
5737 operand indIndex(any_RegP reg, rRegL lreg)
5738 %{
5739 constraint(ALLOC_IN_RC(ptr_reg));
5740 match(AddP reg lreg);
5741
5742 op_cost(10);
5743 format %{"[$reg + $lreg]" %}
5744 interface(MEMORY_INTER) %{
5745 base($reg);
5746 index($lreg);
5747 scale(0x0);
5748 disp(0x0);
5749 %}
5750 %}
5751
5752 // Indirect Memory Times Scale Plus Index Register
5753 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5754 %{
5755 constraint(ALLOC_IN_RC(ptr_reg));
5756 match(AddP reg (LShiftL lreg scale));
5757
5758 op_cost(10);
5759 format %{"[$reg + $lreg << $scale]" %}
5760 interface(MEMORY_INTER) %{
5761 base($reg);
5762 index($lreg);
5763 scale($scale);
5764 disp(0x0);
5765 %}
5766 %}
5767
5768 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5769 %{
5770 constraint(ALLOC_IN_RC(ptr_reg));
5771 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5772 match(AddP reg (LShiftL (ConvI2L idx) scale));
5773
5774 op_cost(10);
5775 format %{"[$reg + pos $idx << $scale]" %}
5776 interface(MEMORY_INTER) %{
5777 base($reg);
5778 index($idx);
5779 scale($scale);
5780 disp(0x0);
5781 %}
5782 %}
5783
5784 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5785 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5786 %{
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(AddP (AddP reg (LShiftL lreg scale)) off);
5789
5790 op_cost(10);
5791 format %{"[$reg + $off + $lreg << $scale]" %}
5792 interface(MEMORY_INTER) %{
5793 base($reg);
5794 index($lreg);
5795 scale($scale);
5796 disp($off);
5797 %}
5798 %}
5799
5800 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5801 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5802 %{
5803 constraint(ALLOC_IN_RC(ptr_reg));
5804 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5805 match(AddP (AddP reg (ConvI2L idx)) off);
5806
5807 op_cost(10);
5808 format %{"[$reg + $off + $idx]" %}
5809 interface(MEMORY_INTER) %{
5810 base($reg);
5811 index($idx);
5812 scale(0x0);
5813 disp($off);
5814 %}
5815 %}
5816
5817 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5818 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5819 %{
5820 constraint(ALLOC_IN_RC(ptr_reg));
5821 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5822 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5823
5824 op_cost(10);
5825 format %{"[$reg + $off + $idx << $scale]" %}
5826 interface(MEMORY_INTER) %{
5827 base($reg);
5828 index($idx);
5829 scale($scale);
5830 disp($off);
5831 %}
5832 %}
5833
5834 // Indirect Narrow Oop Operand
5835 operand indCompressedOop(rRegN reg) %{
5836 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5837 constraint(ALLOC_IN_RC(ptr_reg));
5838 match(DecodeN reg);
5839
5840 op_cost(10);
5841 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5842 interface(MEMORY_INTER) %{
5843 base(0xc); // R12
5844 index($reg);
5845 scale(0x3);
5846 disp(0x0);
5847 %}
5848 %}
5849
5850 // Indirect Narrow Oop Plus Offset Operand
5851 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5852 // we can't free r12 even with CompressedOops::base() == nullptr.
5853 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5854 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5855 constraint(ALLOC_IN_RC(ptr_reg));
5856 match(AddP (DecodeN reg) off);
5857
5858 op_cost(10);
5859 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5860 interface(MEMORY_INTER) %{
5861 base(0xc); // R12
5862 index($reg);
5863 scale(0x3);
5864 disp($off);
5865 %}
5866 %}
5867
5868 // Indirect Memory Operand
5869 operand indirectNarrow(rRegN reg)
5870 %{
5871 predicate(CompressedOops::shift() == 0);
5872 constraint(ALLOC_IN_RC(ptr_reg));
5873 match(DecodeN reg);
5874
5875 format %{ "[$reg]" %}
5876 interface(MEMORY_INTER) %{
5877 base($reg);
5878 index(0x4);
5879 scale(0x0);
5880 disp(0x0);
5881 %}
5882 %}
5883
5884 // Indirect Memory Plus Short Offset Operand
5885 operand indOffset8Narrow(rRegN reg, immL8 off)
5886 %{
5887 predicate(CompressedOops::shift() == 0);
5888 constraint(ALLOC_IN_RC(ptr_reg));
5889 match(AddP (DecodeN reg) off);
5890
5891 format %{ "[$reg + $off (8-bit)]" %}
5892 interface(MEMORY_INTER) %{
5893 base($reg);
5894 index(0x4);
5895 scale(0x0);
5896 disp($off);
5897 %}
5898 %}
5899
5900 // Indirect Memory Plus Long Offset Operand
5901 operand indOffset32Narrow(rRegN reg, immL32 off)
5902 %{
5903 predicate(CompressedOops::shift() == 0);
5904 constraint(ALLOC_IN_RC(ptr_reg));
5905 match(AddP (DecodeN reg) off);
5906
5907 format %{ "[$reg + $off (32-bit)]" %}
5908 interface(MEMORY_INTER) %{
5909 base($reg);
5910 index(0x4);
5911 scale(0x0);
5912 disp($off);
5913 %}
5914 %}
5915
5916 // Indirect Memory Plus Index Register Plus Offset Operand
5917 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5918 %{
5919 predicate(CompressedOops::shift() == 0);
5920 constraint(ALLOC_IN_RC(ptr_reg));
5921 match(AddP (AddP (DecodeN reg) lreg) off);
5922
5923 op_cost(10);
5924 format %{"[$reg + $off + $lreg]" %}
5925 interface(MEMORY_INTER) %{
5926 base($reg);
5927 index($lreg);
5928 scale(0x0);
5929 disp($off);
5930 %}
5931 %}
5932
5933 // Indirect Memory Plus Index Register Plus Offset Operand
5934 operand indIndexNarrow(rRegN reg, rRegL lreg)
5935 %{
5936 predicate(CompressedOops::shift() == 0);
5937 constraint(ALLOC_IN_RC(ptr_reg));
5938 match(AddP (DecodeN reg) lreg);
5939
5940 op_cost(10);
5941 format %{"[$reg + $lreg]" %}
5942 interface(MEMORY_INTER) %{
5943 base($reg);
5944 index($lreg);
5945 scale(0x0);
5946 disp(0x0);
5947 %}
5948 %}
5949
5950 // Indirect Memory Times Scale Plus Index Register
5951 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5952 %{
5953 predicate(CompressedOops::shift() == 0);
5954 constraint(ALLOC_IN_RC(ptr_reg));
5955 match(AddP (DecodeN reg) (LShiftL lreg scale));
5956
5957 op_cost(10);
5958 format %{"[$reg + $lreg << $scale]" %}
5959 interface(MEMORY_INTER) %{
5960 base($reg);
5961 index($lreg);
5962 scale($scale);
5963 disp(0x0);
5964 %}
5965 %}
5966
5967 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5968 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5969 %{
5970 predicate(CompressedOops::shift() == 0);
5971 constraint(ALLOC_IN_RC(ptr_reg));
5972 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5973
5974 op_cost(10);
5975 format %{"[$reg + $off + $lreg << $scale]" %}
5976 interface(MEMORY_INTER) %{
5977 base($reg);
5978 index($lreg);
5979 scale($scale);
5980 disp($off);
5981 %}
5982 %}
5983
5984 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5985 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5986 %{
5987 constraint(ALLOC_IN_RC(ptr_reg));
5988 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5989 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5990
5991 op_cost(10);
5992 format %{"[$reg + $off + $idx]" %}
5993 interface(MEMORY_INTER) %{
5994 base($reg);
5995 index($idx);
5996 scale(0x0);
5997 disp($off);
5998 %}
5999 %}
6000
6001 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
6002 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
6003 %{
6004 constraint(ALLOC_IN_RC(ptr_reg));
6005 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
6006 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
6007
6008 op_cost(10);
6009 format %{"[$reg + $off + $idx << $scale]" %}
6010 interface(MEMORY_INTER) %{
6011 base($reg);
6012 index($idx);
6013 scale($scale);
6014 disp($off);
6015 %}
6016 %}
6017
6018 //----------Special Memory Operands--------------------------------------------
6019 // Stack Slot Operand - This operand is used for loading and storing temporary
6020 // values on the stack where a match requires a value to
6021 // flow through memory.
6022 operand stackSlotP(sRegP reg)
6023 %{
6024 constraint(ALLOC_IN_RC(stack_slots));
6025 // No match rule because this operand is only generated in matching
6026
6027 format %{ "[$reg]" %}
6028 interface(MEMORY_INTER) %{
6029 base(0x4); // RSP
6030 index(0x4); // No Index
6031 scale(0x0); // No Scale
6032 disp($reg); // Stack Offset
6033 %}
6034 %}
6035
6036 operand stackSlotI(sRegI reg)
6037 %{
6038 constraint(ALLOC_IN_RC(stack_slots));
6039 // No match rule because this operand is only generated in matching
6040
6041 format %{ "[$reg]" %}
6042 interface(MEMORY_INTER) %{
6043 base(0x4); // RSP
6044 index(0x4); // No Index
6045 scale(0x0); // No Scale
6046 disp($reg); // Stack Offset
6047 %}
6048 %}
6049
6050 operand stackSlotF(sRegF reg)
6051 %{
6052 constraint(ALLOC_IN_RC(stack_slots));
6053 // No match rule because this operand is only generated in matching
6054
6055 format %{ "[$reg]" %}
6056 interface(MEMORY_INTER) %{
6057 base(0x4); // RSP
6058 index(0x4); // No Index
6059 scale(0x0); // No Scale
6060 disp($reg); // Stack Offset
6061 %}
6062 %}
6063
6064 operand stackSlotD(sRegD reg)
6065 %{
6066 constraint(ALLOC_IN_RC(stack_slots));
6067 // No match rule because this operand is only generated in matching
6068
6069 format %{ "[$reg]" %}
6070 interface(MEMORY_INTER) %{
6071 base(0x4); // RSP
6072 index(0x4); // No Index
6073 scale(0x0); // No Scale
6074 disp($reg); // Stack Offset
6075 %}
6076 %}
6077 operand stackSlotL(sRegL reg)
6078 %{
6079 constraint(ALLOC_IN_RC(stack_slots));
6080 // No match rule because this operand is only generated in matching
6081
6082 format %{ "[$reg]" %}
6083 interface(MEMORY_INTER) %{
6084 base(0x4); // RSP
6085 index(0x4); // No Index
6086 scale(0x0); // No Scale
6087 disp($reg); // Stack Offset
6088 %}
6089 %}
6090
6091 //----------Conditional Branch Operands----------------------------------------
6092 // Comparison Op - This is the operation of the comparison, and is limited to
6093 // the following set of codes:
6094 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6095 //
6096 // Other attributes of the comparison, such as unsignedness, are specified
6097 // by the comparison instruction that sets a condition code flags register.
6098 // That result is represented by a flags operand whose subtype is appropriate
6099 // to the unsignedness (etc.) of the comparison.
6100 //
6101 // Later, the instruction which matches both the Comparison Op (a Bool) and
6102 // the flags (produced by the Cmp) specifies the coding of the comparison op
6103 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6104
6105 // Comparison Code
6106 operand cmpOp()
6107 %{
6108 match(Bool);
6109
6110 format %{ "" %}
6111 interface(COND_INTER) %{
6112 equal(0x4, "e");
6113 not_equal(0x5, "ne");
6114 less(0xc, "l");
6115 greater_equal(0xd, "ge");
6116 less_equal(0xe, "le");
6117 greater(0xf, "g");
6118 overflow(0x0, "o");
6119 no_overflow(0x1, "no");
6120 %}
6121 %}
6122
6123 // Comparison Code, unsigned compare. Used by FP also, with
6124 // C2 (unordered) turned into GT or LT already. The other bits
6125 // C0 and C3 are turned into Carry & Zero flags.
6126 operand cmpOpU()
6127 %{
6128 match(Bool);
6129
6130 format %{ "" %}
6131 interface(COND_INTER) %{
6132 equal(0x4, "e");
6133 not_equal(0x5, "ne");
6134 less(0x2, "b");
6135 greater_equal(0x3, "ae");
6136 less_equal(0x6, "be");
6137 greater(0x7, "a");
6138 overflow(0x0, "o");
6139 no_overflow(0x1, "no");
6140 %}
6141 %}
6142
6143
6144 // Floating comparisons that don't require any fixup for the unordered case,
6145 // If both inputs of the comparison are the same, ZF is always set so we
6146 // don't need to use cmpOpUCF2 for eq/ne
6147 operand cmpOpUCF() %{
6148 match(Bool);
6149 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6150 (n->as_Bool()->_test._test == BoolTest::lt ||
6151 n->as_Bool()->_test._test == BoolTest::ge ||
6152 n->as_Bool()->_test._test == BoolTest::le ||
6153 n->as_Bool()->_test._test == BoolTest::gt ||
6154 n->in(1)->in(1) == n->in(1)->in(2)));
6155 format %{ "" %}
6156 interface(COND_INTER) %{
6157 equal(0xb, "np");
6158 not_equal(0xa, "p");
6159 less(0x2, "b");
6160 greater_equal(0x3, "ae");
6161 less_equal(0x6, "be");
6162 greater(0x7, "a");
6163 overflow(0x0, "o");
6164 no_overflow(0x1, "no");
6165 %}
6166 %}
6167
6168
6169 // Floating comparisons that can be fixed up with extra conditional jumps
6170 operand cmpOpUCF2() %{
6171 match(Bool);
6172 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6173 (n->as_Bool()->_test._test == BoolTest::ne ||
6174 n->as_Bool()->_test._test == BoolTest::eq) &&
6175 n->in(1)->in(1) != n->in(1)->in(2));
6176 format %{ "" %}
6177 interface(COND_INTER) %{
6178 equal(0x4, "e");
6179 not_equal(0x5, "ne");
6180 less(0x2, "b");
6181 greater_equal(0x3, "ae");
6182 less_equal(0x6, "be");
6183 greater(0x7, "a");
6184 overflow(0x0, "o");
6185 no_overflow(0x1, "no");
6186 %}
6187 %}
6188
6189
6190 // Floating point comparisons that set condition flags to test more directly,
6191 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6192 // are used for L (<) and LE (<=) conditions. It's important to convert these
6193 // latter conditions to ones that use unsigned tests before passing into an
6194 // instruction because the preceding comparison might be based on a three way
6195 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6196 operand cmpOpUCFE()
6197 %{
6198 match(Bool);
6199 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6200 (n->as_Bool()->_test._test == BoolTest::ne ||
6201 n->as_Bool()->_test._test == BoolTest::eq ||
6202 n->as_Bool()->_test._test == BoolTest::lt ||
6203 n->as_Bool()->_test._test == BoolTest::ge ||
6204 n->as_Bool()->_test._test == BoolTest::le ||
6205 n->as_Bool()->_test._test == BoolTest::gt));
6206
6207 format %{ "" %}
6208 interface(COND_INTER) %{
6209 equal(0x4, "e");
6210 not_equal(0x5, "ne");
6211 less(0x2, "b");
6212 greater_equal(0x3, "ae");
6213 less_equal(0x6, "be");
6214 greater(0x7, "a");
6215 overflow(0x0, "o");
6216 no_overflow(0x1, "no");
6217 %}
6218 %}
6219
6220 // Operands for bound floating pointer register arguments
6221 operand rxmm0() %{
6222 constraint(ALLOC_IN_RC(xmm0_reg));
6223 match(VecX);
6224 format%{%}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Vectors
6229
6230 // Dummy generic vector class. Should be used for all vector operands.
6231 // Replaced with vec[SDXYZ] during post-selection pass.
6232 operand vec() %{
6233 constraint(ALLOC_IN_RC(dynamic));
6234 match(VecX);
6235 match(VecY);
6236 match(VecZ);
6237 match(VecS);
6238 match(VecD);
6239
6240 format %{ %}
6241 interface(REG_INTER);
6242 %}
6243
6244 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6245 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6246 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6247 // runtime code generation via reg_class_dynamic.
6248 operand legVec() %{
6249 constraint(ALLOC_IN_RC(dynamic));
6250 match(VecX);
6251 match(VecY);
6252 match(VecZ);
6253 match(VecS);
6254 match(VecD);
6255
6256 format %{ %}
6257 interface(REG_INTER);
6258 %}
6259
6260 // Replaces vec during post-selection cleanup. See above.
6261 operand vecS() %{
6262 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6263 match(VecS);
6264
6265 format %{ %}
6266 interface(REG_INTER);
6267 %}
6268
6269 // Replaces legVec during post-selection cleanup. See above.
6270 operand legVecS() %{
6271 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6272 match(VecS);
6273
6274 format %{ %}
6275 interface(REG_INTER);
6276 %}
6277
6278 // Replaces vec during post-selection cleanup. See above.
6279 operand vecD() %{
6280 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6281 match(VecD);
6282
6283 format %{ %}
6284 interface(REG_INTER);
6285 %}
6286
6287 // Replaces legVec during post-selection cleanup. See above.
6288 operand legVecD() %{
6289 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6290 match(VecD);
6291
6292 format %{ %}
6293 interface(REG_INTER);
6294 %}
6295
6296 // Replaces vec during post-selection cleanup. See above.
6297 operand vecX() %{
6298 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6299 match(VecX);
6300
6301 format %{ %}
6302 interface(REG_INTER);
6303 %}
6304
6305 // Replaces legVec during post-selection cleanup. See above.
6306 operand legVecX() %{
6307 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6308 match(VecX);
6309
6310 format %{ %}
6311 interface(REG_INTER);
6312 %}
6313
6314 // Replaces vec during post-selection cleanup. See above.
6315 operand vecY() %{
6316 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6317 match(VecY);
6318
6319 format %{ %}
6320 interface(REG_INTER);
6321 %}
6322
6323 // Replaces legVec during post-selection cleanup. See above.
6324 operand legVecY() %{
6325 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6326 match(VecY);
6327
6328 format %{ %}
6329 interface(REG_INTER);
6330 %}
6331
6332 // Replaces vec during post-selection cleanup. See above.
6333 operand vecZ() %{
6334 constraint(ALLOC_IN_RC(vectorz_reg));
6335 match(VecZ);
6336
6337 format %{ %}
6338 interface(REG_INTER);
6339 %}
6340
6341 // Replaces legVec during post-selection cleanup. See above.
6342 operand legVecZ() %{
6343 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6344 match(VecZ);
6345
6346 format %{ %}
6347 interface(REG_INTER);
6348 %}
6349
6350 //----------OPERAND CLASSES----------------------------------------------------
6351 // Operand Classes are groups of operands that are used as to simplify
6352 // instruction definitions by not requiring the AD writer to specify separate
6353 // instructions for every form of operand when the instruction accepts
6354 // multiple operand types with the same basic encoding and format. The classic
6355 // case of this is memory operands.
6356
6357 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6358 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6359 indCompressedOop, indCompressedOopOffset,
6360 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6361 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6362 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6363
6364 //----------PIPELINE-----------------------------------------------------------
6365 // Rules which define the behavior of the target architectures pipeline.
6366 pipeline %{
6367
6368 //----------ATTRIBUTES---------------------------------------------------------
6369 attributes %{
6370 variable_size_instructions; // Fixed size instructions
6371 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6372 instruction_unit_size = 1; // An instruction is 1 bytes long
6373 instruction_fetch_unit_size = 16; // The processor fetches one line
6374 instruction_fetch_units = 1; // of 16 bytes
6375 %}
6376
6377 //----------RESOURCES----------------------------------------------------------
6378 // Resources are the functional units available to the machine
6379
6380 // Generic P2/P3 pipeline
6381 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6382 // 3 instructions decoded per cycle.
6383 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6384 // 3 ALU op, only ALU0 handles mul instructions.
6385 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6386 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6387 BR, FPU,
6388 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6389
6390 //----------PIPELINE DESCRIPTION-----------------------------------------------
6391 // Pipeline Description specifies the stages in the machine's pipeline
6392
6393 // Generic P2/P3 pipeline
6394 pipe_desc(S0, S1, S2, S3, S4, S5);
6395
6396 //----------PIPELINE CLASSES---------------------------------------------------
6397 // Pipeline Classes describe the stages in which input and output are
6398 // referenced by the hardware pipeline.
6399
6400 // Naming convention: ialu or fpu
6401 // Then: _reg
6402 // Then: _reg if there is a 2nd register
6403 // Then: _long if it's a pair of instructions implementing a long
6404 // Then: _fat if it requires the big decoder
6405 // Or: _mem if it requires the big decoder and a memory unit.
6406
6407 // Integer ALU reg operation
6408 pipe_class ialu_reg(rRegI dst)
6409 %{
6410 single_instruction;
6411 dst : S4(write);
6412 dst : S3(read);
6413 DECODE : S0; // any decoder
6414 ALU : S3; // any alu
6415 %}
6416
6417 // Long ALU reg operation
6418 pipe_class ialu_reg_long(rRegL dst)
6419 %{
6420 instruction_count(2);
6421 dst : S4(write);
6422 dst : S3(read);
6423 DECODE : S0(2); // any 2 decoders
6424 ALU : S3(2); // both alus
6425 %}
6426
6427 // Integer ALU reg operation using big decoder
6428 pipe_class ialu_reg_fat(rRegI dst)
6429 %{
6430 single_instruction;
6431 dst : S4(write);
6432 dst : S3(read);
6433 D0 : S0; // big decoder only
6434 ALU : S3; // any alu
6435 %}
6436
6437 // Integer ALU reg-reg operation
6438 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6439 %{
6440 single_instruction;
6441 dst : S4(write);
6442 src : S3(read);
6443 DECODE : S0; // any decoder
6444 ALU : S3; // any alu
6445 %}
6446
6447 // Integer ALU reg-reg operation
6448 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6449 %{
6450 single_instruction;
6451 dst : S4(write);
6452 src : S3(read);
6453 D0 : S0; // big decoder only
6454 ALU : S3; // any alu
6455 %}
6456
6457 // Integer ALU reg-mem operation
6458 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6459 %{
6460 single_instruction;
6461 dst : S5(write);
6462 mem : S3(read);
6463 D0 : S0; // big decoder only
6464 ALU : S4; // any alu
6465 MEM : S3; // any mem
6466 %}
6467
6468 // Integer mem operation (prefetch)
6469 pipe_class ialu_mem(memory mem)
6470 %{
6471 single_instruction;
6472 mem : S3(read);
6473 D0 : S0; // big decoder only
6474 MEM : S3; // any mem
6475 %}
6476
6477 // Integer Store to Memory
6478 pipe_class ialu_mem_reg(memory mem, rRegI src)
6479 %{
6480 single_instruction;
6481 mem : S3(read);
6482 src : S5(read);
6483 D0 : S0; // big decoder only
6484 ALU : S4; // any alu
6485 MEM : S3;
6486 %}
6487
6488 // // Long Store to Memory
6489 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6490 // %{
6491 // instruction_count(2);
6492 // mem : S3(read);
6493 // src : S5(read);
6494 // D0 : S0(2); // big decoder only; twice
6495 // ALU : S4(2); // any 2 alus
6496 // MEM : S3(2); // Both mems
6497 // %}
6498
6499 // Integer Store to Memory
6500 pipe_class ialu_mem_imm(memory mem)
6501 %{
6502 single_instruction;
6503 mem : S3(read);
6504 D0 : S0; // big decoder only
6505 ALU : S4; // any alu
6506 MEM : S3;
6507 %}
6508
6509 // Integer ALU0 reg-reg operation
6510 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6511 %{
6512 single_instruction;
6513 dst : S4(write);
6514 src : S3(read);
6515 D0 : S0; // Big decoder only
6516 ALU0 : S3; // only alu0
6517 %}
6518
6519 // Integer ALU0 reg-mem operation
6520 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6521 %{
6522 single_instruction;
6523 dst : S5(write);
6524 mem : S3(read);
6525 D0 : S0; // big decoder only
6526 ALU0 : S4; // ALU0 only
6527 MEM : S3; // any mem
6528 %}
6529
6530 // Integer ALU reg-reg operation
6531 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6532 %{
6533 single_instruction;
6534 cr : S4(write);
6535 src1 : S3(read);
6536 src2 : S3(read);
6537 DECODE : S0; // any decoder
6538 ALU : S3; // any alu
6539 %}
6540
6541 // Integer ALU reg-imm operation
6542 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6543 %{
6544 single_instruction;
6545 cr : S4(write);
6546 src1 : S3(read);
6547 DECODE : S0; // any decoder
6548 ALU : S3; // any alu
6549 %}
6550
6551 // Integer ALU reg-mem operation
6552 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6553 %{
6554 single_instruction;
6555 cr : S4(write);
6556 src1 : S3(read);
6557 src2 : S3(read);
6558 D0 : S0; // big decoder only
6559 ALU : S4; // any alu
6560 MEM : S3;
6561 %}
6562
6563 // Conditional move reg-reg
6564 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6565 %{
6566 instruction_count(4);
6567 y : S4(read);
6568 q : S3(read);
6569 p : S3(read);
6570 DECODE : S0(4); // any decoder
6571 %}
6572
6573 // Conditional move reg-reg
6574 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6575 %{
6576 single_instruction;
6577 dst : S4(write);
6578 src : S3(read);
6579 cr : S3(read);
6580 DECODE : S0; // any decoder
6581 %}
6582
6583 // Conditional move reg-mem
6584 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6585 %{
6586 single_instruction;
6587 dst : S4(write);
6588 src : S3(read);
6589 cr : S3(read);
6590 DECODE : S0; // any decoder
6591 MEM : S3;
6592 %}
6593
6594 // Conditional move reg-reg long
6595 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6596 %{
6597 single_instruction;
6598 dst : S4(write);
6599 src : S3(read);
6600 cr : S3(read);
6601 DECODE : S0(2); // any 2 decoders
6602 %}
6603
6604 // Float reg-reg operation
6605 pipe_class fpu_reg(regD dst)
6606 %{
6607 instruction_count(2);
6608 dst : S3(read);
6609 DECODE : S0(2); // any 2 decoders
6610 FPU : S3;
6611 %}
6612
6613 // Float reg-reg operation
6614 pipe_class fpu_reg_reg(regD dst, regD src)
6615 %{
6616 instruction_count(2);
6617 dst : S4(write);
6618 src : S3(read);
6619 DECODE : S0(2); // any 2 decoders
6620 FPU : S3;
6621 %}
6622
6623 // Float reg-reg operation
6624 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6625 %{
6626 instruction_count(3);
6627 dst : S4(write);
6628 src1 : S3(read);
6629 src2 : S3(read);
6630 DECODE : S0(3); // any 3 decoders
6631 FPU : S3(2);
6632 %}
6633
6634 // Float reg-reg operation
6635 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6636 %{
6637 instruction_count(4);
6638 dst : S4(write);
6639 src1 : S3(read);
6640 src2 : S3(read);
6641 src3 : S3(read);
6642 DECODE : S0(4); // any 3 decoders
6643 FPU : S3(2);
6644 %}
6645
6646 // Float reg-reg operation
6647 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6648 %{
6649 instruction_count(4);
6650 dst : S4(write);
6651 src1 : S3(read);
6652 src2 : S3(read);
6653 src3 : S3(read);
6654 DECODE : S1(3); // any 3 decoders
6655 D0 : S0; // Big decoder only
6656 FPU : S3(2);
6657 MEM : S3;
6658 %}
6659
6660 // Float reg-mem operation
6661 pipe_class fpu_reg_mem(regD dst, memory mem)
6662 %{
6663 instruction_count(2);
6664 dst : S5(write);
6665 mem : S3(read);
6666 D0 : S0; // big decoder only
6667 DECODE : S1; // any decoder for FPU POP
6668 FPU : S4;
6669 MEM : S3; // any mem
6670 %}
6671
6672 // Float reg-mem operation
6673 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6674 %{
6675 instruction_count(3);
6676 dst : S5(write);
6677 src1 : S3(read);
6678 mem : S3(read);
6679 D0 : S0; // big decoder only
6680 DECODE : S1(2); // any decoder for FPU POP
6681 FPU : S4;
6682 MEM : S3; // any mem
6683 %}
6684
6685 // Float mem-reg operation
6686 pipe_class fpu_mem_reg(memory mem, regD src)
6687 %{
6688 instruction_count(2);
6689 src : S5(read);
6690 mem : S3(read);
6691 DECODE : S0; // any decoder for FPU PUSH
6692 D0 : S1; // big decoder only
6693 FPU : S4;
6694 MEM : S3; // any mem
6695 %}
6696
6697 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6698 %{
6699 instruction_count(3);
6700 src1 : S3(read);
6701 src2 : S3(read);
6702 mem : S3(read);
6703 DECODE : S0(2); // any decoder for FPU PUSH
6704 D0 : S1; // big decoder only
6705 FPU : S4;
6706 MEM : S3; // any mem
6707 %}
6708
6709 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6710 %{
6711 instruction_count(3);
6712 src1 : S3(read);
6713 src2 : S3(read);
6714 mem : S4(read);
6715 DECODE : S0; // any decoder for FPU PUSH
6716 D0 : S0(2); // big decoder only
6717 FPU : S4;
6718 MEM : S3(2); // any mem
6719 %}
6720
6721 pipe_class fpu_mem_mem(memory dst, memory src1)
6722 %{
6723 instruction_count(2);
6724 src1 : S3(read);
6725 dst : S4(read);
6726 D0 : S0(2); // big decoder only
6727 MEM : S3(2); // any mem
6728 %}
6729
6730 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6731 %{
6732 instruction_count(3);
6733 src1 : S3(read);
6734 src2 : S3(read);
6735 dst : S4(read);
6736 D0 : S0(3); // big decoder only
6737 FPU : S4;
6738 MEM : S3(3); // any mem
6739 %}
6740
6741 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6742 %{
6743 instruction_count(3);
6744 src1 : S4(read);
6745 mem : S4(read);
6746 DECODE : S0; // any decoder for FPU PUSH
6747 D0 : S0(2); // big decoder only
6748 FPU : S4;
6749 MEM : S3(2); // any mem
6750 %}
6751
6752 // Float load constant
6753 pipe_class fpu_reg_con(regD dst)
6754 %{
6755 instruction_count(2);
6756 dst : S5(write);
6757 D0 : S0; // big decoder only for the load
6758 DECODE : S1; // any decoder for FPU POP
6759 FPU : S4;
6760 MEM : S3; // any mem
6761 %}
6762
6763 // Float load constant
6764 pipe_class fpu_reg_reg_con(regD dst, regD src)
6765 %{
6766 instruction_count(3);
6767 dst : S5(write);
6768 src : S3(read);
6769 D0 : S0; // big decoder only for the load
6770 DECODE : S1(2); // any decoder for FPU POP
6771 FPU : S4;
6772 MEM : S3; // any mem
6773 %}
6774
6775 // UnConditional branch
6776 pipe_class pipe_jmp(label labl)
6777 %{
6778 single_instruction;
6779 BR : S3;
6780 %}
6781
6782 // Conditional branch
6783 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6784 %{
6785 single_instruction;
6786 cr : S1(read);
6787 BR : S3;
6788 %}
6789
6790 // Allocation idiom
6791 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6792 %{
6793 instruction_count(1); force_serialization;
6794 fixed_latency(6);
6795 heap_ptr : S3(read);
6796 DECODE : S0(3);
6797 D0 : S2;
6798 MEM : S3;
6799 ALU : S3(2);
6800 dst : S5(write);
6801 BR : S5;
6802 %}
6803
6804 // Generic big/slow expanded idiom
6805 pipe_class pipe_slow()
6806 %{
6807 instruction_count(10); multiple_bundles; force_serialization;
6808 fixed_latency(100);
6809 D0 : S0(2);
6810 MEM : S3(2);
6811 %}
6812
6813 // The real do-nothing guy
6814 pipe_class empty()
6815 %{
6816 instruction_count(0);
6817 %}
6818
6819 // Define the class for the Nop node
6820 define
6821 %{
6822 MachNop = empty;
6823 %}
6824
6825 %}
6826
6827 //----------INSTRUCTIONS-------------------------------------------------------
6828 //
6829 // match -- States which machine-independent subtree may be replaced
6830 // by this instruction.
6831 // ins_cost -- The estimated cost of this instruction is used by instruction
6832 // selection to identify a minimum cost tree of machine
6833 // instructions that matches a tree of machine-independent
6834 // instructions.
6835 // format -- A string providing the disassembly for this instruction.
6836 // The value of an instruction's operand may be inserted
6837 // by referring to it with a '$' prefix.
6838 // opcode -- Three instruction opcodes may be provided. These are referred
6839 // to within an encode class as $primary, $secondary, and $tertiary
6840 // rrspectively. The primary opcode is commonly used to
6841 // indicate the type of machine instruction, while secondary
6842 // and tertiary are often used for prefix options or addressing
6843 // modes.
6844 // ins_encode -- A list of encode classes with parameters. The encode class
6845 // name must have been defined in an 'enc_class' specification
6846 // in the encode section of the architecture description.
6847
6848 // ============================================================================
6849
6850 instruct ShouldNotReachHere() %{
6851 match(Halt);
6852 format %{ "stop\t# ShouldNotReachHere" %}
6853 ins_encode %{
6854 if (is_reachable()) {
6855 const char* str = __ code_string(_halt_reason);
6856 __ stop(str);
6857 }
6858 %}
6859 ins_pipe(pipe_slow);
6860 %}
6861
6862 // ============================================================================
6863
6864 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6865 // Load Float
6866 instruct MoveF2VL(vlRegF dst, regF src) %{
6867 match(Set dst src);
6868 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6869 ins_encode %{
6870 ShouldNotReachHere();
6871 %}
6872 ins_pipe( fpu_reg_reg );
6873 %}
6874
6875 // Load Float
6876 instruct MoveF2LEG(legRegF dst, regF src) %{
6877 match(Set dst src);
6878 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6879 ins_encode %{
6880 ShouldNotReachHere();
6881 %}
6882 ins_pipe( fpu_reg_reg );
6883 %}
6884
6885 // Load Float
6886 instruct MoveVL2F(regF dst, vlRegF src) %{
6887 match(Set dst src);
6888 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6889 ins_encode %{
6890 ShouldNotReachHere();
6891 %}
6892 ins_pipe( fpu_reg_reg );
6893 %}
6894
6895 // Load Float
6896 instruct MoveLEG2F(regF dst, legRegF src) %{
6897 match(Set dst src);
6898 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6899 ins_encode %{
6900 ShouldNotReachHere();
6901 %}
6902 ins_pipe( fpu_reg_reg );
6903 %}
6904
6905 // Load Double
6906 instruct MoveD2VL(vlRegD dst, regD src) %{
6907 match(Set dst src);
6908 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6909 ins_encode %{
6910 ShouldNotReachHere();
6911 %}
6912 ins_pipe( fpu_reg_reg );
6913 %}
6914
6915 // Load Double
6916 instruct MoveD2LEG(legRegD dst, regD src) %{
6917 match(Set dst src);
6918 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6919 ins_encode %{
6920 ShouldNotReachHere();
6921 %}
6922 ins_pipe( fpu_reg_reg );
6923 %}
6924
6925 // Load Double
6926 instruct MoveVL2D(regD dst, vlRegD src) %{
6927 match(Set dst src);
6928 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6929 ins_encode %{
6930 ShouldNotReachHere();
6931 %}
6932 ins_pipe( fpu_reg_reg );
6933 %}
6934
6935 // Load Double
6936 instruct MoveLEG2D(regD dst, legRegD src) %{
6937 match(Set dst src);
6938 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6939 ins_encode %{
6940 ShouldNotReachHere();
6941 %}
6942 ins_pipe( fpu_reg_reg );
6943 %}
6944
6945 //----------Load/Store/Move Instructions---------------------------------------
6946 //----------Load Instructions--------------------------------------------------
6947
6948 // Load Byte (8 bit signed)
6949 instruct loadB(rRegI dst, memory mem)
6950 %{
6951 match(Set dst (LoadB mem));
6952
6953 ins_cost(125);
6954 format %{ "movsbl $dst, $mem\t# byte" %}
6955
6956 ins_encode %{
6957 __ movsbl($dst$$Register, $mem$$Address);
6958 %}
6959
6960 ins_pipe(ialu_reg_mem);
6961 %}
6962
6963 // Load Byte (8 bit signed) into Long Register
6964 instruct loadB2L(rRegL dst, memory mem)
6965 %{
6966 match(Set dst (ConvI2L (LoadB mem)));
6967
6968 ins_cost(125);
6969 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6970
6971 ins_encode %{
6972 __ movsbq($dst$$Register, $mem$$Address);
6973 %}
6974
6975 ins_pipe(ialu_reg_mem);
6976 %}
6977
6978 // Load Unsigned Byte (8 bit UNsigned)
6979 instruct loadUB(rRegI dst, memory mem)
6980 %{
6981 match(Set dst (LoadUB mem));
6982
6983 ins_cost(125);
6984 format %{ "movzbl $dst, $mem\t# ubyte" %}
6985
6986 ins_encode %{
6987 __ movzbl($dst$$Register, $mem$$Address);
6988 %}
6989
6990 ins_pipe(ialu_reg_mem);
6991 %}
6992
6993 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6994 instruct loadUB2L(rRegL dst, memory mem)
6995 %{
6996 match(Set dst (ConvI2L (LoadUB mem)));
6997
6998 ins_cost(125);
6999 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
7000
7001 ins_encode %{
7002 __ movzbq($dst$$Register, $mem$$Address);
7003 %}
7004
7005 ins_pipe(ialu_reg_mem);
7006 %}
7007
7008 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
7009 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7010 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
7011 effect(KILL cr);
7012
7013 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
7014 "andl $dst, right_n_bits($mask, 8)" %}
7015 ins_encode %{
7016 Register Rdst = $dst$$Register;
7017 __ movzbq(Rdst, $mem$$Address);
7018 __ andl(Rdst, $mask$$constant & right_n_bits(8));
7019 %}
7020 ins_pipe(ialu_reg_mem);
7021 %}
7022
7023 // Load Short (16 bit signed)
7024 instruct loadS(rRegI dst, memory mem)
7025 %{
7026 match(Set dst (LoadS mem));
7027
7028 ins_cost(125);
7029 format %{ "movswl $dst, $mem\t# short" %}
7030
7031 ins_encode %{
7032 __ movswl($dst$$Register, $mem$$Address);
7033 %}
7034
7035 ins_pipe(ialu_reg_mem);
7036 %}
7037
7038 // Load Short (16 bit signed) to Byte (8 bit signed)
7039 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7040 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
7041
7042 ins_cost(125);
7043 format %{ "movsbl $dst, $mem\t# short -> byte" %}
7044 ins_encode %{
7045 __ movsbl($dst$$Register, $mem$$Address);
7046 %}
7047 ins_pipe(ialu_reg_mem);
7048 %}
7049
7050 // Load Short (16 bit signed) into Long Register
7051 instruct loadS2L(rRegL dst, memory mem)
7052 %{
7053 match(Set dst (ConvI2L (LoadS mem)));
7054
7055 ins_cost(125);
7056 format %{ "movswq $dst, $mem\t# short -> long" %}
7057
7058 ins_encode %{
7059 __ movswq($dst$$Register, $mem$$Address);
7060 %}
7061
7062 ins_pipe(ialu_reg_mem);
7063 %}
7064
7065 // Load Unsigned Short/Char (16 bit UNsigned)
7066 instruct loadUS(rRegI dst, memory mem)
7067 %{
7068 match(Set dst (LoadUS mem));
7069
7070 ins_cost(125);
7071 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7072
7073 ins_encode %{
7074 __ movzwl($dst$$Register, $mem$$Address);
7075 %}
7076
7077 ins_pipe(ialu_reg_mem);
7078 %}
7079
7080 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7081 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7082 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7083
7084 ins_cost(125);
7085 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7086 ins_encode %{
7087 __ movsbl($dst$$Register, $mem$$Address);
7088 %}
7089 ins_pipe(ialu_reg_mem);
7090 %}
7091
7092 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7093 instruct loadUS2L(rRegL dst, memory mem)
7094 %{
7095 match(Set dst (ConvI2L (LoadUS mem)));
7096
7097 ins_cost(125);
7098 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7099
7100 ins_encode %{
7101 __ movzwq($dst$$Register, $mem$$Address);
7102 %}
7103
7104 ins_pipe(ialu_reg_mem);
7105 %}
7106
7107 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7108 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7109 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7110
7111 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7112 ins_encode %{
7113 __ movzbq($dst$$Register, $mem$$Address);
7114 %}
7115 ins_pipe(ialu_reg_mem);
7116 %}
7117
7118 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7119 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7120 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7121 effect(KILL cr);
7122
7123 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7124 "andl $dst, right_n_bits($mask, 16)" %}
7125 ins_encode %{
7126 Register Rdst = $dst$$Register;
7127 __ movzwq(Rdst, $mem$$Address);
7128 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7129 %}
7130 ins_pipe(ialu_reg_mem);
7131 %}
7132
7133 // Load Integer
7134 instruct loadI(rRegI dst, memory mem)
7135 %{
7136 match(Set dst (LoadI mem));
7137
7138 ins_cost(125);
7139 format %{ "movl $dst, $mem\t# int" %}
7140
7141 ins_encode %{
7142 __ movl($dst$$Register, $mem$$Address);
7143 %}
7144
7145 ins_pipe(ialu_reg_mem);
7146 %}
7147
7148 // Load Integer (32 bit signed) to Byte (8 bit signed)
7149 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7150 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7151
7152 ins_cost(125);
7153 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7154 ins_encode %{
7155 __ movsbl($dst$$Register, $mem$$Address);
7156 %}
7157 ins_pipe(ialu_reg_mem);
7158 %}
7159
7160 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7161 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7162 match(Set dst (AndI (LoadI mem) mask));
7163
7164 ins_cost(125);
7165 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7166 ins_encode %{
7167 __ movzbl($dst$$Register, $mem$$Address);
7168 %}
7169 ins_pipe(ialu_reg_mem);
7170 %}
7171
7172 // Load Integer (32 bit signed) to Short (16 bit signed)
7173 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7174 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7175
7176 ins_cost(125);
7177 format %{ "movswl $dst, $mem\t# int -> short" %}
7178 ins_encode %{
7179 __ movswl($dst$$Register, $mem$$Address);
7180 %}
7181 ins_pipe(ialu_reg_mem);
7182 %}
7183
7184 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7185 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7186 match(Set dst (AndI (LoadI mem) mask));
7187
7188 ins_cost(125);
7189 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7190 ins_encode %{
7191 __ movzwl($dst$$Register, $mem$$Address);
7192 %}
7193 ins_pipe(ialu_reg_mem);
7194 %}
7195
7196 // Load Integer into Long Register
7197 instruct loadI2L(rRegL dst, memory mem)
7198 %{
7199 match(Set dst (ConvI2L (LoadI mem)));
7200
7201 ins_cost(125);
7202 format %{ "movslq $dst, $mem\t# int -> long" %}
7203
7204 ins_encode %{
7205 __ movslq($dst$$Register, $mem$$Address);
7206 %}
7207
7208 ins_pipe(ialu_reg_mem);
7209 %}
7210
7211 // Load Integer with mask 0xFF into Long Register
7212 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7213 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7214
7215 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7216 ins_encode %{
7217 __ movzbq($dst$$Register, $mem$$Address);
7218 %}
7219 ins_pipe(ialu_reg_mem);
7220 %}
7221
7222 // Load Integer with mask 0xFFFF into Long Register
7223 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7224 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7225
7226 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7227 ins_encode %{
7228 __ movzwq($dst$$Register, $mem$$Address);
7229 %}
7230 ins_pipe(ialu_reg_mem);
7231 %}
7232
7233 // Load Integer with a 31-bit mask into Long Register
7234 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7235 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7236 effect(KILL cr);
7237
7238 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7239 "andl $dst, $mask" %}
7240 ins_encode %{
7241 Register Rdst = $dst$$Register;
7242 __ movl(Rdst, $mem$$Address);
7243 __ andl(Rdst, $mask$$constant);
7244 %}
7245 ins_pipe(ialu_reg_mem);
7246 %}
7247
7248 // Load Unsigned Integer into Long Register
7249 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7250 %{
7251 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7252
7253 ins_cost(125);
7254 format %{ "movl $dst, $mem\t# uint -> long" %}
7255
7256 ins_encode %{
7257 __ movl($dst$$Register, $mem$$Address);
7258 %}
7259
7260 ins_pipe(ialu_reg_mem);
7261 %}
7262
7263 // Load Long
7264 instruct loadL(rRegL dst, memory mem)
7265 %{
7266 match(Set dst (LoadL mem));
7267
7268 ins_cost(125);
7269 format %{ "movq $dst, $mem\t# long" %}
7270
7271 ins_encode %{
7272 __ movq($dst$$Register, $mem$$Address);
7273 %}
7274
7275 ins_pipe(ialu_reg_mem); // XXX
7276 %}
7277
7278 // Load Range
7279 instruct loadRange(rRegI dst, memory mem)
7280 %{
7281 match(Set dst (LoadRange mem));
7282
7283 ins_cost(125); // XXX
7284 format %{ "movl $dst, $mem\t# range" %}
7285 ins_encode %{
7286 __ movl($dst$$Register, $mem$$Address);
7287 %}
7288 ins_pipe(ialu_reg_mem);
7289 %}
7290
7291 // Load Pointer
7292 instruct loadP(rRegP dst, memory mem)
7293 %{
7294 match(Set dst (LoadP mem));
7295 predicate(n->as_Load()->barrier_data() == 0);
7296
7297 ins_cost(125); // XXX
7298 format %{ "movq $dst, $mem\t# ptr" %}
7299 ins_encode %{
7300 __ movq($dst$$Register, $mem$$Address);
7301 %}
7302 ins_pipe(ialu_reg_mem); // XXX
7303 %}
7304
7305 // Load Compressed Pointer
7306 instruct loadN(rRegN dst, memory mem)
7307 %{
7308 predicate(n->as_Load()->barrier_data() == 0);
7309 match(Set dst (LoadN mem));
7310
7311 ins_cost(125); // XXX
7312 format %{ "movl $dst, $mem\t# compressed ptr" %}
7313 ins_encode %{
7314 __ movl($dst$$Register, $mem$$Address);
7315 %}
7316 ins_pipe(ialu_reg_mem); // XXX
7317 %}
7318
7319
7320 // Load Klass Pointer
7321 instruct loadKlass(rRegP dst, memory mem)
7322 %{
7323 match(Set dst (LoadKlass mem));
7324
7325 ins_cost(125); // XXX
7326 format %{ "movq $dst, $mem\t# class" %}
7327 ins_encode %{
7328 __ movq($dst$$Register, $mem$$Address);
7329 %}
7330 ins_pipe(ialu_reg_mem); // XXX
7331 %}
7332
7333 // Load narrow Klass Pointer
7334 instruct loadNKlass(rRegN dst, memory mem)
7335 %{
7336 predicate(!UseCompactObjectHeaders);
7337 match(Set dst (LoadNKlass mem));
7338
7339 ins_cost(125); // XXX
7340 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7341 ins_encode %{
7342 __ movl($dst$$Register, $mem$$Address);
7343 %}
7344 ins_pipe(ialu_reg_mem); // XXX
7345 %}
7346
7347 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7348 %{
7349 predicate(UseCompactObjectHeaders);
7350 match(Set dst (LoadNKlass mem));
7351 effect(KILL cr);
7352 ins_cost(125);
7353 format %{
7354 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7355 "shrl $dst, markWord::klass_shift_at_offset"
7356 %}
7357 ins_encode %{
7358 __ movl($dst$$Register, $mem$$Address);
7359 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7360 %}
7361 ins_pipe(ialu_reg_mem);
7362 %}
7363
7364 // Load Float
7365 instruct loadF(regF dst, memory mem)
7366 %{
7367 match(Set dst (LoadF mem));
7368
7369 ins_cost(145); // XXX
7370 format %{ "movss $dst, $mem\t# float" %}
7371 ins_encode %{
7372 __ movflt($dst$$XMMRegister, $mem$$Address);
7373 %}
7374 ins_pipe(pipe_slow); // XXX
7375 %}
7376
7377 // Load Double
7378 instruct loadD_partial(regD dst, memory mem)
7379 %{
7380 predicate(!UseXmmLoadAndClearUpper);
7381 match(Set dst (LoadD mem));
7382
7383 ins_cost(145); // XXX
7384 format %{ "movlpd $dst, $mem\t# double" %}
7385 ins_encode %{
7386 __ movdbl($dst$$XMMRegister, $mem$$Address);
7387 %}
7388 ins_pipe(pipe_slow); // XXX
7389 %}
7390
7391 instruct loadD(regD dst, memory mem)
7392 %{
7393 predicate(UseXmmLoadAndClearUpper);
7394 match(Set dst (LoadD mem));
7395
7396 ins_cost(145); // XXX
7397 format %{ "movsd $dst, $mem\t# double" %}
7398 ins_encode %{
7399 __ movdbl($dst$$XMMRegister, $mem$$Address);
7400 %}
7401 ins_pipe(pipe_slow); // XXX
7402 %}
7403
7404 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7405 %{
7406 match(Set dst con);
7407
7408 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7409
7410 ins_encode %{
7411 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7412 %}
7413
7414 ins_pipe(ialu_reg_fat);
7415 %}
7416
7417 // min = java.lang.Math.min(float a, float b)
7418 // max = java.lang.Math.max(float a, float b)
7419 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7420 %{
7421 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7422 match(Set dst (MaxF a b));
7423 match(Set dst (MinF a b));
7424
7425 format %{ "minmaxF $dst, $a, $b" %}
7426 ins_encode %{
7427 int opcode = this->ideal_Opcode();
7428 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7429 %}
7430 ins_pipe( pipe_slow );
7431 %}
7432
7433 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
7434 %{
7435 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7436 match(Set dst (MaxF a b));
7437 match(Set dst (MinF a b));
7438 effect(USE a, USE b, TEMP rtmp, KILL cr);
7439
7440 format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7441 ins_encode %{
7442 int opcode = this->ideal_Opcode();
7443 bool min = (opcode == Op_MinF) ? true : false;
7444 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7445 min, fp_prec_flt /*pt*/);
7446 %}
7447 ins_pipe( pipe_slow );
7448 %}
7449
7450 // min = java.lang.Math.min(float a, float b)
7451 // max = java.lang.Math.max(float a, float b)
7452 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7453 %{
7454 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7455 match(Set dst (MaxF a b));
7456 match(Set dst (MinF a b));
7457 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7458
7459 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7460 ins_encode %{
7461 int opcode = this->ideal_Opcode();
7462 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7463 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7464 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7465 %}
7466 ins_pipe( pipe_slow );
7467 %}
7468
7469 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
7470 %{
7471 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7472 match(Set dst (MaxF a b));
7473 match(Set dst (MinF a b));
7474 effect(USE a, USE b, TEMP rtmp, KILL cr);
7475
7476 format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
7477 ins_encode %{
7478 int opcode = this->ideal_Opcode();
7479 bool min = (opcode == Op_MinF) ? true : false;
7480 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7481 min, fp_prec_flt /*pt*/);
7482 %}
7483 ins_pipe( pipe_slow );
7484 %}
7485
7486 // min = java.lang.Math.min(double a, double b)
7487 // max = java.lang.Math.max(double a, double b)
7488 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7489 %{
7490 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7491 match(Set dst (MaxD a b));
7492 match(Set dst (MinD a b));
7493
7494 format %{ "minmaxD $dst, $a, $b" %}
7495 ins_encode %{
7496 int opcode = this->ideal_Opcode();
7497 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7498 %}
7499 ins_pipe( pipe_slow );
7500 %}
7501
7502 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
7503 %{
7504 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7505 match(Set dst (MaxD a b));
7506 match(Set dst (MinD a b));
7507 effect(USE a, USE b, TEMP rtmp, KILL cr);
7508
7509 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7510 ins_encode %{
7511 int opcode = this->ideal_Opcode();
7512 bool min = (opcode == Op_MinD) ? true : false;
7513 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7514 min, fp_prec_dbl /*pt*/);
7515 %}
7516 ins_pipe( pipe_slow );
7517 %}
7518
7519 // min = java.lang.Math.min(double a, double b)
7520 // max = java.lang.Math.max(double a, double b)
7521 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7522 %{
7523 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7524 match(Set dst (MaxD a b));
7525 match(Set dst (MinD a b));
7526 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7527
7528 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7529 ins_encode %{
7530 int opcode = this->ideal_Opcode();
7531 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7532 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7533 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7534 %}
7535 ins_pipe( pipe_slow );
7536 %}
7537
7538 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
7539 %{
7540 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7541 match(Set dst (MaxD a b));
7542 match(Set dst (MinD a b));
7543 effect(USE a, USE b, TEMP rtmp, KILL cr);
7544
7545 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7546 ins_encode %{
7547 int opcode = this->ideal_Opcode();
7548 bool min = (opcode == Op_MinD) ? true : false;
7549 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7550 min, fp_prec_dbl /*pt*/);
7551 %}
7552 ins_pipe( pipe_slow );
7553 %}
7554
7555 // Load Effective Address
7556 instruct leaP8(rRegP dst, indOffset8 mem)
7557 %{
7558 match(Set dst mem);
7559
7560 ins_cost(110); // XXX
7561 format %{ "leaq $dst, $mem\t# ptr 8" %}
7562 ins_encode %{
7563 __ leaq($dst$$Register, $mem$$Address);
7564 %}
7565 ins_pipe(ialu_reg_reg_fat);
7566 %}
7567
7568 instruct leaP32(rRegP dst, indOffset32 mem)
7569 %{
7570 match(Set dst mem);
7571
7572 ins_cost(110);
7573 format %{ "leaq $dst, $mem\t# ptr 32" %}
7574 ins_encode %{
7575 __ leaq($dst$$Register, $mem$$Address);
7576 %}
7577 ins_pipe(ialu_reg_reg_fat);
7578 %}
7579
7580 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7581 %{
7582 match(Set dst mem);
7583
7584 ins_cost(110);
7585 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7586 ins_encode %{
7587 __ leaq($dst$$Register, $mem$$Address);
7588 %}
7589 ins_pipe(ialu_reg_reg_fat);
7590 %}
7591
7592 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7593 %{
7594 match(Set dst mem);
7595
7596 ins_cost(110);
7597 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7598 ins_encode %{
7599 __ leaq($dst$$Register, $mem$$Address);
7600 %}
7601 ins_pipe(ialu_reg_reg_fat);
7602 %}
7603
7604 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7605 %{
7606 match(Set dst mem);
7607
7608 ins_cost(110);
7609 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7610 ins_encode %{
7611 __ leaq($dst$$Register, $mem$$Address);
7612 %}
7613 ins_pipe(ialu_reg_reg_fat);
7614 %}
7615
7616 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7617 %{
7618 match(Set dst mem);
7619
7620 ins_cost(110);
7621 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7622 ins_encode %{
7623 __ leaq($dst$$Register, $mem$$Address);
7624 %}
7625 ins_pipe(ialu_reg_reg_fat);
7626 %}
7627
7628 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7629 %{
7630 match(Set dst mem);
7631
7632 ins_cost(110);
7633 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7634 ins_encode %{
7635 __ leaq($dst$$Register, $mem$$Address);
7636 %}
7637 ins_pipe(ialu_reg_reg_fat);
7638 %}
7639
7640 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7641 %{
7642 match(Set dst mem);
7643
7644 ins_cost(110);
7645 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7646 ins_encode %{
7647 __ leaq($dst$$Register, $mem$$Address);
7648 %}
7649 ins_pipe(ialu_reg_reg_fat);
7650 %}
7651
7652 // Load Effective Address which uses Narrow (32-bits) oop
7653 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7654 %{
7655 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7656 match(Set dst mem);
7657
7658 ins_cost(110);
7659 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7660 ins_encode %{
7661 __ leaq($dst$$Register, $mem$$Address);
7662 %}
7663 ins_pipe(ialu_reg_reg_fat);
7664 %}
7665
7666 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7667 %{
7668 predicate(CompressedOops::shift() == 0);
7669 match(Set dst mem);
7670
7671 ins_cost(110); // XXX
7672 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7673 ins_encode %{
7674 __ leaq($dst$$Register, $mem$$Address);
7675 %}
7676 ins_pipe(ialu_reg_reg_fat);
7677 %}
7678
7679 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7680 %{
7681 predicate(CompressedOops::shift() == 0);
7682 match(Set dst mem);
7683
7684 ins_cost(110);
7685 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7686 ins_encode %{
7687 __ leaq($dst$$Register, $mem$$Address);
7688 %}
7689 ins_pipe(ialu_reg_reg_fat);
7690 %}
7691
7692 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7693 %{
7694 predicate(CompressedOops::shift() == 0);
7695 match(Set dst mem);
7696
7697 ins_cost(110);
7698 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7699 ins_encode %{
7700 __ leaq($dst$$Register, $mem$$Address);
7701 %}
7702 ins_pipe(ialu_reg_reg_fat);
7703 %}
7704
7705 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7706 %{
7707 predicate(CompressedOops::shift() == 0);
7708 match(Set dst mem);
7709
7710 ins_cost(110);
7711 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7712 ins_encode %{
7713 __ leaq($dst$$Register, $mem$$Address);
7714 %}
7715 ins_pipe(ialu_reg_reg_fat);
7716 %}
7717
7718 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7719 %{
7720 predicate(CompressedOops::shift() == 0);
7721 match(Set dst mem);
7722
7723 ins_cost(110);
7724 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7725 ins_encode %{
7726 __ leaq($dst$$Register, $mem$$Address);
7727 %}
7728 ins_pipe(ialu_reg_reg_fat);
7729 %}
7730
7731 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7732 %{
7733 predicate(CompressedOops::shift() == 0);
7734 match(Set dst mem);
7735
7736 ins_cost(110);
7737 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7738 ins_encode %{
7739 __ leaq($dst$$Register, $mem$$Address);
7740 %}
7741 ins_pipe(ialu_reg_reg_fat);
7742 %}
7743
7744 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7745 %{
7746 predicate(CompressedOops::shift() == 0);
7747 match(Set dst mem);
7748
7749 ins_cost(110);
7750 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7751 ins_encode %{
7752 __ leaq($dst$$Register, $mem$$Address);
7753 %}
7754 ins_pipe(ialu_reg_reg_fat);
7755 %}
7756
7757 instruct loadConI(rRegI dst, immI src)
7758 %{
7759 match(Set dst src);
7760
7761 format %{ "movl $dst, $src\t# int" %}
7762 ins_encode %{
7763 __ movl($dst$$Register, $src$$constant);
7764 %}
7765 ins_pipe(ialu_reg_fat); // XXX
7766 %}
7767
7768 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7769 %{
7770 match(Set dst src);
7771 effect(KILL cr);
7772
7773 ins_cost(50);
7774 format %{ "xorl $dst, $dst\t# int" %}
7775 ins_encode %{
7776 __ xorl($dst$$Register, $dst$$Register);
7777 %}
7778 ins_pipe(ialu_reg);
7779 %}
7780
7781 instruct loadConL(rRegL dst, immL src)
7782 %{
7783 match(Set dst src);
7784
7785 ins_cost(150);
7786 format %{ "movq $dst, $src\t# long" %}
7787 ins_encode %{
7788 __ mov64($dst$$Register, $src$$constant);
7789 %}
7790 ins_pipe(ialu_reg);
7791 %}
7792
7793 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7794 %{
7795 match(Set dst src);
7796 effect(KILL cr);
7797
7798 ins_cost(50);
7799 format %{ "xorl $dst, $dst\t# long" %}
7800 ins_encode %{
7801 __ xorl($dst$$Register, $dst$$Register);
7802 %}
7803 ins_pipe(ialu_reg); // XXX
7804 %}
7805
7806 instruct loadConUL32(rRegL dst, immUL32 src)
7807 %{
7808 match(Set dst src);
7809
7810 ins_cost(60);
7811 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7812 ins_encode %{
7813 __ movl($dst$$Register, $src$$constant);
7814 %}
7815 ins_pipe(ialu_reg);
7816 %}
7817
7818 instruct loadConL32(rRegL dst, immL32 src)
7819 %{
7820 match(Set dst src);
7821
7822 ins_cost(70);
7823 format %{ "movq $dst, $src\t# long (32-bit)" %}
7824 ins_encode %{
7825 __ movq($dst$$Register, $src$$constant);
7826 %}
7827 ins_pipe(ialu_reg);
7828 %}
7829
7830 instruct loadConP(rRegP dst, immP con) %{
7831 match(Set dst con);
7832
7833 format %{ "movq $dst, $con\t# ptr" %}
7834 ins_encode %{
7835 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7836 %}
7837 ins_pipe(ialu_reg_fat); // XXX
7838 %}
7839
7840 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7841 %{
7842 match(Set dst src);
7843 effect(KILL cr);
7844
7845 ins_cost(50);
7846 format %{ "xorl $dst, $dst\t# ptr" %}
7847 ins_encode %{
7848 __ xorl($dst$$Register, $dst$$Register);
7849 %}
7850 ins_pipe(ialu_reg);
7851 %}
7852
7853 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7854 %{
7855 match(Set dst src);
7856 effect(KILL cr);
7857
7858 ins_cost(60);
7859 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7860 ins_encode %{
7861 __ movl($dst$$Register, $src$$constant);
7862 %}
7863 ins_pipe(ialu_reg);
7864 %}
7865
7866 instruct loadConF(regF dst, immF con) %{
7867 match(Set dst con);
7868 ins_cost(125);
7869 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7870 ins_encode %{
7871 __ movflt($dst$$XMMRegister, $constantaddress($con));
7872 %}
7873 ins_pipe(pipe_slow);
7874 %}
7875
7876 instruct loadConH(regF dst, immH con) %{
7877 match(Set dst con);
7878 ins_cost(125);
7879 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7880 ins_encode %{
7881 __ movflt($dst$$XMMRegister, $constantaddress($con));
7882 %}
7883 ins_pipe(pipe_slow);
7884 %}
7885
7886 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7887 match(Set dst src);
7888 effect(KILL cr);
7889 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7890 ins_encode %{
7891 __ xorq($dst$$Register, $dst$$Register);
7892 %}
7893 ins_pipe(ialu_reg);
7894 %}
7895
7896 instruct loadConN(rRegN dst, immN src) %{
7897 match(Set dst src);
7898
7899 ins_cost(125);
7900 format %{ "movl $dst, $src\t# compressed ptr" %}
7901 ins_encode %{
7902 address con = (address)$src$$constant;
7903 if (con == nullptr) {
7904 ShouldNotReachHere();
7905 } else {
7906 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7907 }
7908 %}
7909 ins_pipe(ialu_reg_fat); // XXX
7910 %}
7911
7912 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7913 match(Set dst src);
7914
7915 ins_cost(125);
7916 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7917 ins_encode %{
7918 address con = (address)$src$$constant;
7919 if (con == nullptr) {
7920 ShouldNotReachHere();
7921 } else {
7922 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7923 }
7924 %}
7925 ins_pipe(ialu_reg_fat); // XXX
7926 %}
7927
7928 instruct loadConF0(regF dst, immF0 src)
7929 %{
7930 match(Set dst src);
7931 ins_cost(100);
7932
7933 format %{ "xorps $dst, $dst\t# float 0.0" %}
7934 ins_encode %{
7935 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7936 %}
7937 ins_pipe(pipe_slow);
7938 %}
7939
7940 // Use the same format since predicate() can not be used here.
7941 instruct loadConD(regD dst, immD con) %{
7942 match(Set dst con);
7943 ins_cost(125);
7944 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7945 ins_encode %{
7946 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7947 %}
7948 ins_pipe(pipe_slow);
7949 %}
7950
7951 instruct loadConD0(regD dst, immD0 src)
7952 %{
7953 match(Set dst src);
7954 ins_cost(100);
7955
7956 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7957 ins_encode %{
7958 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7959 %}
7960 ins_pipe(pipe_slow);
7961 %}
7962
7963 instruct loadSSI(rRegI dst, stackSlotI src)
7964 %{
7965 match(Set dst src);
7966
7967 ins_cost(125);
7968 format %{ "movl $dst, $src\t# int stk" %}
7969 ins_encode %{
7970 __ movl($dst$$Register, $src$$Address);
7971 %}
7972 ins_pipe(ialu_reg_mem);
7973 %}
7974
7975 instruct loadSSL(rRegL dst, stackSlotL src)
7976 %{
7977 match(Set dst src);
7978
7979 ins_cost(125);
7980 format %{ "movq $dst, $src\t# long stk" %}
7981 ins_encode %{
7982 __ movq($dst$$Register, $src$$Address);
7983 %}
7984 ins_pipe(ialu_reg_mem);
7985 %}
7986
7987 instruct loadSSP(rRegP dst, stackSlotP src)
7988 %{
7989 match(Set dst src);
7990
7991 ins_cost(125);
7992 format %{ "movq $dst, $src\t# ptr stk" %}
7993 ins_encode %{
7994 __ movq($dst$$Register, $src$$Address);
7995 %}
7996 ins_pipe(ialu_reg_mem);
7997 %}
7998
7999 instruct loadSSF(regF dst, stackSlotF src)
8000 %{
8001 match(Set dst src);
8002
8003 ins_cost(125);
8004 format %{ "movss $dst, $src\t# float stk" %}
8005 ins_encode %{
8006 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
8007 %}
8008 ins_pipe(pipe_slow); // XXX
8009 %}
8010
8011 // Use the same format since predicate() can not be used here.
8012 instruct loadSSD(regD dst, stackSlotD src)
8013 %{
8014 match(Set dst src);
8015
8016 ins_cost(125);
8017 format %{ "movsd $dst, $src\t# double stk" %}
8018 ins_encode %{
8019 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
8020 %}
8021 ins_pipe(pipe_slow); // XXX
8022 %}
8023
8024 // Prefetch instructions for allocation.
8025 // Must be safe to execute with invalid address (cannot fault).
8026
8027 instruct prefetchAlloc( memory mem ) %{
8028 predicate(AllocatePrefetchInstr==3);
8029 match(PrefetchAllocation mem);
8030 ins_cost(125);
8031
8032 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
8033 ins_encode %{
8034 __ prefetchw($mem$$Address);
8035 %}
8036 ins_pipe(ialu_mem);
8037 %}
8038
8039 instruct prefetchAllocNTA( memory mem ) %{
8040 predicate(AllocatePrefetchInstr==0);
8041 match(PrefetchAllocation mem);
8042 ins_cost(125);
8043
8044 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
8045 ins_encode %{
8046 __ prefetchnta($mem$$Address);
8047 %}
8048 ins_pipe(ialu_mem);
8049 %}
8050
8051 instruct prefetchAllocT0( memory mem ) %{
8052 predicate(AllocatePrefetchInstr==1);
8053 match(PrefetchAllocation mem);
8054 ins_cost(125);
8055
8056 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8057 ins_encode %{
8058 __ prefetcht0($mem$$Address);
8059 %}
8060 ins_pipe(ialu_mem);
8061 %}
8062
8063 instruct prefetchAllocT2( memory mem ) %{
8064 predicate(AllocatePrefetchInstr==2);
8065 match(PrefetchAllocation mem);
8066 ins_cost(125);
8067
8068 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8069 ins_encode %{
8070 __ prefetcht2($mem$$Address);
8071 %}
8072 ins_pipe(ialu_mem);
8073 %}
8074
8075 //----------Store Instructions-------------------------------------------------
8076
8077 // Store Byte
8078 instruct storeB(memory mem, rRegI src)
8079 %{
8080 match(Set mem (StoreB mem src));
8081
8082 ins_cost(125); // XXX
8083 format %{ "movb $mem, $src\t# byte" %}
8084 ins_encode %{
8085 __ movb($mem$$Address, $src$$Register);
8086 %}
8087 ins_pipe(ialu_mem_reg);
8088 %}
8089
8090 // Store Char/Short
8091 instruct storeC(memory mem, rRegI src)
8092 %{
8093 match(Set mem (StoreC mem src));
8094
8095 ins_cost(125); // XXX
8096 format %{ "movw $mem, $src\t# char/short" %}
8097 ins_encode %{
8098 __ movw($mem$$Address, $src$$Register);
8099 %}
8100 ins_pipe(ialu_mem_reg);
8101 %}
8102
8103 // Store Integer
8104 instruct storeI(memory mem, rRegI src)
8105 %{
8106 match(Set mem (StoreI mem src));
8107
8108 ins_cost(125); // XXX
8109 format %{ "movl $mem, $src\t# int" %}
8110 ins_encode %{
8111 __ movl($mem$$Address, $src$$Register);
8112 %}
8113 ins_pipe(ialu_mem_reg);
8114 %}
8115
8116 // Store Long
8117 instruct storeL(memory mem, rRegL src)
8118 %{
8119 match(Set mem (StoreL mem src));
8120
8121 ins_cost(125); // XXX
8122 format %{ "movq $mem, $src\t# long" %}
8123 ins_encode %{
8124 __ movq($mem$$Address, $src$$Register);
8125 %}
8126 ins_pipe(ialu_mem_reg); // XXX
8127 %}
8128
8129 // Store Pointer
8130 instruct storeP(memory mem, any_RegP src)
8131 %{
8132 predicate(n->as_Store()->barrier_data() == 0);
8133 match(Set mem (StoreP mem src));
8134
8135 ins_cost(125); // XXX
8136 format %{ "movq $mem, $src\t# ptr" %}
8137 ins_encode %{
8138 __ movq($mem$$Address, $src$$Register);
8139 %}
8140 ins_pipe(ialu_mem_reg);
8141 %}
8142
8143 instruct storeImmP0(memory mem, immP0 zero)
8144 %{
8145 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8146 match(Set mem (StoreP mem zero));
8147
8148 ins_cost(125); // XXX
8149 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8150 ins_encode %{
8151 __ movq($mem$$Address, r12);
8152 %}
8153 ins_pipe(ialu_mem_reg);
8154 %}
8155
8156 // Store Null Pointer, mark word, or other simple pointer constant.
8157 instruct storeImmP(memory mem, immP31 src)
8158 %{
8159 predicate(n->as_Store()->barrier_data() == 0);
8160 match(Set mem (StoreP mem src));
8161
8162 ins_cost(150); // XXX
8163 format %{ "movq $mem, $src\t# ptr" %}
8164 ins_encode %{
8165 __ movq($mem$$Address, $src$$constant);
8166 %}
8167 ins_pipe(ialu_mem_imm);
8168 %}
8169
8170 // Store Compressed Pointer
8171 instruct storeN(memory mem, rRegN src)
8172 %{
8173 predicate(n->as_Store()->barrier_data() == 0);
8174 match(Set mem (StoreN mem src));
8175
8176 ins_cost(125); // XXX
8177 format %{ "movl $mem, $src\t# compressed ptr" %}
8178 ins_encode %{
8179 __ movl($mem$$Address, $src$$Register);
8180 %}
8181 ins_pipe(ialu_mem_reg);
8182 %}
8183
8184 instruct storeNKlass(memory mem, rRegN src)
8185 %{
8186 match(Set mem (StoreNKlass mem src));
8187
8188 ins_cost(125); // XXX
8189 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8190 ins_encode %{
8191 __ movl($mem$$Address, $src$$Register);
8192 %}
8193 ins_pipe(ialu_mem_reg);
8194 %}
8195
8196 instruct storeImmN0(memory mem, immN0 zero)
8197 %{
8198 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8199 match(Set mem (StoreN mem zero));
8200
8201 ins_cost(125); // XXX
8202 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8203 ins_encode %{
8204 __ movl($mem$$Address, r12);
8205 %}
8206 ins_pipe(ialu_mem_reg);
8207 %}
8208
8209 instruct storeImmN(memory mem, immN src)
8210 %{
8211 predicate(n->as_Store()->barrier_data() == 0);
8212 match(Set mem (StoreN mem src));
8213
8214 ins_cost(150); // XXX
8215 format %{ "movl $mem, $src\t# compressed ptr" %}
8216 ins_encode %{
8217 address con = (address)$src$$constant;
8218 if (con == nullptr) {
8219 __ movl($mem$$Address, 0);
8220 } else {
8221 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8222 }
8223 %}
8224 ins_pipe(ialu_mem_imm);
8225 %}
8226
8227 instruct storeImmNKlass(memory mem, immNKlass src)
8228 %{
8229 match(Set mem (StoreNKlass mem src));
8230
8231 ins_cost(150); // XXX
8232 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8233 ins_encode %{
8234 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8235 %}
8236 ins_pipe(ialu_mem_imm);
8237 %}
8238
8239 // Store Integer Immediate
8240 instruct storeImmI0(memory mem, immI_0 zero)
8241 %{
8242 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8243 match(Set mem (StoreI mem zero));
8244
8245 ins_cost(125); // XXX
8246 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8247 ins_encode %{
8248 __ movl($mem$$Address, r12);
8249 %}
8250 ins_pipe(ialu_mem_reg);
8251 %}
8252
8253 instruct storeImmI(memory mem, immI src)
8254 %{
8255 match(Set mem (StoreI mem src));
8256
8257 ins_cost(150);
8258 format %{ "movl $mem, $src\t# int" %}
8259 ins_encode %{
8260 __ movl($mem$$Address, $src$$constant);
8261 %}
8262 ins_pipe(ialu_mem_imm);
8263 %}
8264
8265 // Store Long Immediate
8266 instruct storeImmL0(memory mem, immL0 zero)
8267 %{
8268 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8269 match(Set mem (StoreL mem zero));
8270
8271 ins_cost(125); // XXX
8272 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8273 ins_encode %{
8274 __ movq($mem$$Address, r12);
8275 %}
8276 ins_pipe(ialu_mem_reg);
8277 %}
8278
8279 instruct storeImmL(memory mem, immL32 src)
8280 %{
8281 match(Set mem (StoreL mem src));
8282
8283 ins_cost(150);
8284 format %{ "movq $mem, $src\t# long" %}
8285 ins_encode %{
8286 __ movq($mem$$Address, $src$$constant);
8287 %}
8288 ins_pipe(ialu_mem_imm);
8289 %}
8290
8291 // Store Short/Char Immediate
8292 instruct storeImmC0(memory mem, immI_0 zero)
8293 %{
8294 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8295 match(Set mem (StoreC mem zero));
8296
8297 ins_cost(125); // XXX
8298 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8299 ins_encode %{
8300 __ movw($mem$$Address, r12);
8301 %}
8302 ins_pipe(ialu_mem_reg);
8303 %}
8304
8305 instruct storeImmI16(memory mem, immI16 src)
8306 %{
8307 predicate(UseStoreImmI16);
8308 match(Set mem (StoreC mem src));
8309
8310 ins_cost(150);
8311 format %{ "movw $mem, $src\t# short/char" %}
8312 ins_encode %{
8313 __ movw($mem$$Address, $src$$constant);
8314 %}
8315 ins_pipe(ialu_mem_imm);
8316 %}
8317
8318 // Store Byte Immediate
8319 instruct storeImmB0(memory mem, immI_0 zero)
8320 %{
8321 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8322 match(Set mem (StoreB mem zero));
8323
8324 ins_cost(125); // XXX
8325 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8326 ins_encode %{
8327 __ movb($mem$$Address, r12);
8328 %}
8329 ins_pipe(ialu_mem_reg);
8330 %}
8331
8332 instruct storeImmB(memory mem, immI8 src)
8333 %{
8334 match(Set mem (StoreB mem src));
8335
8336 ins_cost(150); // XXX
8337 format %{ "movb $mem, $src\t# byte" %}
8338 ins_encode %{
8339 __ movb($mem$$Address, $src$$constant);
8340 %}
8341 ins_pipe(ialu_mem_imm);
8342 %}
8343
8344 // Store Float
8345 instruct storeF(memory mem, regF src)
8346 %{
8347 match(Set mem (StoreF mem src));
8348
8349 ins_cost(95); // XXX
8350 format %{ "movss $mem, $src\t# float" %}
8351 ins_encode %{
8352 __ movflt($mem$$Address, $src$$XMMRegister);
8353 %}
8354 ins_pipe(pipe_slow); // XXX
8355 %}
8356
8357 // Store immediate Float value (it is faster than store from XMM register)
8358 instruct storeF0(memory mem, immF0 zero)
8359 %{
8360 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8361 match(Set mem (StoreF mem zero));
8362
8363 ins_cost(25); // XXX
8364 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8365 ins_encode %{
8366 __ movl($mem$$Address, r12);
8367 %}
8368 ins_pipe(ialu_mem_reg);
8369 %}
8370
8371 instruct storeF_imm(memory mem, immF src)
8372 %{
8373 match(Set mem (StoreF mem src));
8374
8375 ins_cost(50);
8376 format %{ "movl $mem, $src\t# float" %}
8377 ins_encode %{
8378 __ movl($mem$$Address, jint_cast($src$$constant));
8379 %}
8380 ins_pipe(ialu_mem_imm);
8381 %}
8382
8383 // Store Double
8384 instruct storeD(memory mem, regD src)
8385 %{
8386 match(Set mem (StoreD mem src));
8387
8388 ins_cost(95); // XXX
8389 format %{ "movsd $mem, $src\t# double" %}
8390 ins_encode %{
8391 __ movdbl($mem$$Address, $src$$XMMRegister);
8392 %}
8393 ins_pipe(pipe_slow); // XXX
8394 %}
8395
8396 // Store immediate double 0.0 (it is faster than store from XMM register)
8397 instruct storeD0_imm(memory mem, immD0 src)
8398 %{
8399 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8400 match(Set mem (StoreD mem src));
8401
8402 ins_cost(50);
8403 format %{ "movq $mem, $src\t# double 0." %}
8404 ins_encode %{
8405 __ movq($mem$$Address, $src$$constant);
8406 %}
8407 ins_pipe(ialu_mem_imm);
8408 %}
8409
8410 instruct storeD0(memory mem, immD0 zero)
8411 %{
8412 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8413 match(Set mem (StoreD mem zero));
8414
8415 ins_cost(25); // XXX
8416 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8417 ins_encode %{
8418 __ movq($mem$$Address, r12);
8419 %}
8420 ins_pipe(ialu_mem_reg);
8421 %}
8422
8423 instruct storeSSI(stackSlotI dst, rRegI src)
8424 %{
8425 match(Set dst src);
8426
8427 ins_cost(100);
8428 format %{ "movl $dst, $src\t# int stk" %}
8429 ins_encode %{
8430 __ movl($dst$$Address, $src$$Register);
8431 %}
8432 ins_pipe( ialu_mem_reg );
8433 %}
8434
8435 instruct storeSSL(stackSlotL dst, rRegL src)
8436 %{
8437 match(Set dst src);
8438
8439 ins_cost(100);
8440 format %{ "movq $dst, $src\t# long stk" %}
8441 ins_encode %{
8442 __ movq($dst$$Address, $src$$Register);
8443 %}
8444 ins_pipe(ialu_mem_reg);
8445 %}
8446
8447 instruct storeSSP(stackSlotP dst, rRegP src)
8448 %{
8449 match(Set dst src);
8450
8451 ins_cost(100);
8452 format %{ "movq $dst, $src\t# ptr stk" %}
8453 ins_encode %{
8454 __ movq($dst$$Address, $src$$Register);
8455 %}
8456 ins_pipe(ialu_mem_reg);
8457 %}
8458
8459 instruct storeSSF(stackSlotF dst, regF src)
8460 %{
8461 match(Set dst src);
8462
8463 ins_cost(95); // XXX
8464 format %{ "movss $dst, $src\t# float stk" %}
8465 ins_encode %{
8466 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8467 %}
8468 ins_pipe(pipe_slow); // XXX
8469 %}
8470
8471 instruct storeSSD(stackSlotD dst, regD src)
8472 %{
8473 match(Set dst src);
8474
8475 ins_cost(95); // XXX
8476 format %{ "movsd $dst, $src\t# double stk" %}
8477 ins_encode %{
8478 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8479 %}
8480 ins_pipe(pipe_slow); // XXX
8481 %}
8482
8483 instruct cacheWB(indirect addr)
8484 %{
8485 predicate(VM_Version::supports_data_cache_line_flush());
8486 match(CacheWB addr);
8487
8488 ins_cost(100);
8489 format %{"cache wb $addr" %}
8490 ins_encode %{
8491 assert($addr->index_position() < 0, "should be");
8492 assert($addr$$disp == 0, "should be");
8493 __ cache_wb(Address($addr$$base$$Register, 0));
8494 %}
8495 ins_pipe(pipe_slow); // XXX
8496 %}
8497
8498 instruct cacheWBPreSync()
8499 %{
8500 predicate(VM_Version::supports_data_cache_line_flush());
8501 match(CacheWBPreSync);
8502
8503 ins_cost(100);
8504 format %{"cache wb presync" %}
8505 ins_encode %{
8506 __ cache_wbsync(true);
8507 %}
8508 ins_pipe(pipe_slow); // XXX
8509 %}
8510
8511 instruct cacheWBPostSync()
8512 %{
8513 predicate(VM_Version::supports_data_cache_line_flush());
8514 match(CacheWBPostSync);
8515
8516 ins_cost(100);
8517 format %{"cache wb postsync" %}
8518 ins_encode %{
8519 __ cache_wbsync(false);
8520 %}
8521 ins_pipe(pipe_slow); // XXX
8522 %}
8523
8524 //----------BSWAP Instructions-------------------------------------------------
8525 instruct bytes_reverse_int(rRegI dst) %{
8526 match(Set dst (ReverseBytesI dst));
8527
8528 format %{ "bswapl $dst" %}
8529 ins_encode %{
8530 __ bswapl($dst$$Register);
8531 %}
8532 ins_pipe( ialu_reg );
8533 %}
8534
8535 instruct bytes_reverse_long(rRegL dst) %{
8536 match(Set dst (ReverseBytesL dst));
8537
8538 format %{ "bswapq $dst" %}
8539 ins_encode %{
8540 __ bswapq($dst$$Register);
8541 %}
8542 ins_pipe( ialu_reg);
8543 %}
8544
8545 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8546 match(Set dst (ReverseBytesUS dst));
8547 effect(KILL cr);
8548
8549 format %{ "bswapl $dst\n\t"
8550 "shrl $dst,16\n\t" %}
8551 ins_encode %{
8552 __ bswapl($dst$$Register);
8553 __ shrl($dst$$Register, 16);
8554 %}
8555 ins_pipe( ialu_reg );
8556 %}
8557
8558 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8559 match(Set dst (ReverseBytesS dst));
8560 effect(KILL cr);
8561
8562 format %{ "bswapl $dst\n\t"
8563 "sar $dst,16\n\t" %}
8564 ins_encode %{
8565 __ bswapl($dst$$Register);
8566 __ sarl($dst$$Register, 16);
8567 %}
8568 ins_pipe( ialu_reg );
8569 %}
8570
8571 //---------- Zeros Count Instructions ------------------------------------------
8572
8573 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8574 predicate(UseCountLeadingZerosInstruction);
8575 match(Set dst (CountLeadingZerosI src));
8576 effect(KILL cr);
8577
8578 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8579 ins_encode %{
8580 __ lzcntl($dst$$Register, $src$$Register);
8581 %}
8582 ins_pipe(ialu_reg);
8583 %}
8584
8585 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8586 predicate(UseCountLeadingZerosInstruction);
8587 match(Set dst (CountLeadingZerosI (LoadI src)));
8588 effect(KILL cr);
8589 ins_cost(175);
8590 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8591 ins_encode %{
8592 __ lzcntl($dst$$Register, $src$$Address);
8593 %}
8594 ins_pipe(ialu_reg_mem);
8595 %}
8596
8597 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8598 predicate(!UseCountLeadingZerosInstruction);
8599 match(Set dst (CountLeadingZerosI src));
8600 effect(KILL cr);
8601
8602 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8603 "jnz skip\n\t"
8604 "movl $dst, -1\n"
8605 "skip:\n\t"
8606 "negl $dst\n\t"
8607 "addl $dst, 31" %}
8608 ins_encode %{
8609 Register Rdst = $dst$$Register;
8610 Register Rsrc = $src$$Register;
8611 Label skip;
8612 __ bsrl(Rdst, Rsrc);
8613 __ jccb(Assembler::notZero, skip);
8614 __ movl(Rdst, -1);
8615 __ bind(skip);
8616 __ negl(Rdst);
8617 __ addl(Rdst, BitsPerInt - 1);
8618 %}
8619 ins_pipe(ialu_reg);
8620 %}
8621
8622 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8623 predicate(UseCountLeadingZerosInstruction);
8624 match(Set dst (CountLeadingZerosL src));
8625 effect(KILL cr);
8626
8627 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8628 ins_encode %{
8629 __ lzcntq($dst$$Register, $src$$Register);
8630 %}
8631 ins_pipe(ialu_reg);
8632 %}
8633
8634 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8635 predicate(UseCountLeadingZerosInstruction);
8636 match(Set dst (CountLeadingZerosL (LoadL src)));
8637 effect(KILL cr);
8638 ins_cost(175);
8639 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8640 ins_encode %{
8641 __ lzcntq($dst$$Register, $src$$Address);
8642 %}
8643 ins_pipe(ialu_reg_mem);
8644 %}
8645
8646 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8647 predicate(!UseCountLeadingZerosInstruction);
8648 match(Set dst (CountLeadingZerosL src));
8649 effect(KILL cr);
8650
8651 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8652 "jnz skip\n\t"
8653 "movl $dst, -1\n"
8654 "skip:\n\t"
8655 "negl $dst\n\t"
8656 "addl $dst, 63" %}
8657 ins_encode %{
8658 Register Rdst = $dst$$Register;
8659 Register Rsrc = $src$$Register;
8660 Label skip;
8661 __ bsrq(Rdst, Rsrc);
8662 __ jccb(Assembler::notZero, skip);
8663 __ movl(Rdst, -1);
8664 __ bind(skip);
8665 __ negl(Rdst);
8666 __ addl(Rdst, BitsPerLong - 1);
8667 %}
8668 ins_pipe(ialu_reg);
8669 %}
8670
8671 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8672 predicate(UseCountTrailingZerosInstruction);
8673 match(Set dst (CountTrailingZerosI src));
8674 effect(KILL cr);
8675
8676 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8677 ins_encode %{
8678 __ tzcntl($dst$$Register, $src$$Register);
8679 %}
8680 ins_pipe(ialu_reg);
8681 %}
8682
8683 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8684 predicate(UseCountTrailingZerosInstruction);
8685 match(Set dst (CountTrailingZerosI (LoadI src)));
8686 effect(KILL cr);
8687 ins_cost(175);
8688 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8689 ins_encode %{
8690 __ tzcntl($dst$$Register, $src$$Address);
8691 %}
8692 ins_pipe(ialu_reg_mem);
8693 %}
8694
8695 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8696 predicate(!UseCountTrailingZerosInstruction);
8697 match(Set dst (CountTrailingZerosI src));
8698 effect(KILL cr);
8699
8700 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8701 "jnz done\n\t"
8702 "movl $dst, 32\n"
8703 "done:" %}
8704 ins_encode %{
8705 Register Rdst = $dst$$Register;
8706 Label done;
8707 __ bsfl(Rdst, $src$$Register);
8708 __ jccb(Assembler::notZero, done);
8709 __ movl(Rdst, BitsPerInt);
8710 __ bind(done);
8711 %}
8712 ins_pipe(ialu_reg);
8713 %}
8714
8715 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8716 predicate(UseCountTrailingZerosInstruction);
8717 match(Set dst (CountTrailingZerosL src));
8718 effect(KILL cr);
8719
8720 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8721 ins_encode %{
8722 __ tzcntq($dst$$Register, $src$$Register);
8723 %}
8724 ins_pipe(ialu_reg);
8725 %}
8726
8727 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8728 predicate(UseCountTrailingZerosInstruction);
8729 match(Set dst (CountTrailingZerosL (LoadL src)));
8730 effect(KILL cr);
8731 ins_cost(175);
8732 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8733 ins_encode %{
8734 __ tzcntq($dst$$Register, $src$$Address);
8735 %}
8736 ins_pipe(ialu_reg_mem);
8737 %}
8738
8739 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8740 predicate(!UseCountTrailingZerosInstruction);
8741 match(Set dst (CountTrailingZerosL src));
8742 effect(KILL cr);
8743
8744 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8745 "jnz done\n\t"
8746 "movl $dst, 64\n"
8747 "done:" %}
8748 ins_encode %{
8749 Register Rdst = $dst$$Register;
8750 Label done;
8751 __ bsfq(Rdst, $src$$Register);
8752 __ jccb(Assembler::notZero, done);
8753 __ movl(Rdst, BitsPerLong);
8754 __ bind(done);
8755 %}
8756 ins_pipe(ialu_reg);
8757 %}
8758
8759 //--------------- Reverse Operation Instructions ----------------
8760 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8761 predicate(!VM_Version::supports_gfni());
8762 match(Set dst (ReverseI src));
8763 effect(TEMP dst, TEMP rtmp, KILL cr);
8764 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8765 ins_encode %{
8766 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8767 %}
8768 ins_pipe( ialu_reg );
8769 %}
8770
8771 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8772 predicate(VM_Version::supports_gfni());
8773 match(Set dst (ReverseI src));
8774 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8775 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8776 ins_encode %{
8777 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8778 %}
8779 ins_pipe( ialu_reg );
8780 %}
8781
8782 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8783 predicate(!VM_Version::supports_gfni());
8784 match(Set dst (ReverseL src));
8785 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8786 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8787 ins_encode %{
8788 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8789 %}
8790 ins_pipe( ialu_reg );
8791 %}
8792
8793 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8794 predicate(VM_Version::supports_gfni());
8795 match(Set dst (ReverseL src));
8796 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8797 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8798 ins_encode %{
8799 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8800 %}
8801 ins_pipe( ialu_reg );
8802 %}
8803
8804 //---------- Population Count Instructions -------------------------------------
8805
8806 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8807 predicate(UsePopCountInstruction);
8808 match(Set dst (PopCountI src));
8809 effect(KILL cr);
8810
8811 format %{ "popcnt $dst, $src" %}
8812 ins_encode %{
8813 __ popcntl($dst$$Register, $src$$Register);
8814 %}
8815 ins_pipe(ialu_reg);
8816 %}
8817
8818 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8819 predicate(UsePopCountInstruction);
8820 match(Set dst (PopCountI (LoadI mem)));
8821 effect(KILL cr);
8822
8823 format %{ "popcnt $dst, $mem" %}
8824 ins_encode %{
8825 __ popcntl($dst$$Register, $mem$$Address);
8826 %}
8827 ins_pipe(ialu_reg);
8828 %}
8829
8830 // Note: Long.bitCount(long) returns an int.
8831 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8832 predicate(UsePopCountInstruction);
8833 match(Set dst (PopCountL src));
8834 effect(KILL cr);
8835
8836 format %{ "popcnt $dst, $src" %}
8837 ins_encode %{
8838 __ popcntq($dst$$Register, $src$$Register);
8839 %}
8840 ins_pipe(ialu_reg);
8841 %}
8842
8843 // Note: Long.bitCount(long) returns an int.
8844 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8845 predicate(UsePopCountInstruction);
8846 match(Set dst (PopCountL (LoadL mem)));
8847 effect(KILL cr);
8848
8849 format %{ "popcnt $dst, $mem" %}
8850 ins_encode %{
8851 __ popcntq($dst$$Register, $mem$$Address);
8852 %}
8853 ins_pipe(ialu_reg);
8854 %}
8855
8856
8857 //----------MemBar Instructions-----------------------------------------------
8858 // Memory barrier flavors
8859
8860 instruct membar_acquire()
8861 %{
8862 match(MemBarAcquire);
8863 match(LoadFence);
8864 ins_cost(0);
8865
8866 size(0);
8867 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8868 ins_encode();
8869 ins_pipe(empty);
8870 %}
8871
8872 instruct membar_acquire_lock()
8873 %{
8874 match(MemBarAcquireLock);
8875 ins_cost(0);
8876
8877 size(0);
8878 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8879 ins_encode();
8880 ins_pipe(empty);
8881 %}
8882
8883 instruct membar_release()
8884 %{
8885 match(MemBarRelease);
8886 match(StoreFence);
8887 ins_cost(0);
8888
8889 size(0);
8890 format %{ "MEMBAR-release ! (empty encoding)" %}
8891 ins_encode();
8892 ins_pipe(empty);
8893 %}
8894
8895 instruct membar_release_lock()
8896 %{
8897 match(MemBarReleaseLock);
8898 ins_cost(0);
8899
8900 size(0);
8901 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8902 ins_encode();
8903 ins_pipe(empty);
8904 %}
8905
8906 instruct membar_storeload(rFlagsReg cr) %{
8907 match(MemBarStoreLoad);
8908 effect(KILL cr);
8909 ins_cost(400);
8910
8911 format %{
8912 $$template
8913 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8914 %}
8915 ins_encode %{
8916 __ membar(Assembler::StoreLoad);
8917 %}
8918 ins_pipe(pipe_slow);
8919 %}
8920
8921 instruct membar_volatile(rFlagsReg cr) %{
8922 match(MemBarVolatile);
8923 effect(KILL cr);
8924 ins_cost(400);
8925
8926 format %{
8927 $$template
8928 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8929 %}
8930 ins_encode %{
8931 __ membar(Assembler::StoreLoad);
8932 %}
8933 ins_pipe(pipe_slow);
8934 %}
8935
8936 instruct unnecessary_membar_volatile()
8937 %{
8938 match(MemBarVolatile);
8939 predicate(Matcher::post_store_load_barrier(n));
8940 ins_cost(0);
8941
8942 size(0);
8943 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8944 ins_encode();
8945 ins_pipe(empty);
8946 %}
8947
8948 instruct membar_full(rFlagsReg cr) %{
8949 match(MemBarFull);
8950 effect(KILL cr);
8951 ins_cost(400);
8952
8953 format %{
8954 $$template
8955 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8956 %}
8957 ins_encode %{
8958 __ membar(Assembler::StoreLoad);
8959 %}
8960 ins_pipe(pipe_slow);
8961 %}
8962
8963 instruct membar_storestore() %{
8964 match(MemBarStoreStore);
8965 match(StoreStoreFence);
8966 ins_cost(0);
8967
8968 size(0);
8969 format %{ "MEMBAR-storestore (empty encoding)" %}
8970 ins_encode( );
8971 ins_pipe(empty);
8972 %}
8973
8974 //----------Move Instructions--------------------------------------------------
8975
8976 instruct castX2P(rRegP dst, rRegL src)
8977 %{
8978 match(Set dst (CastX2P src));
8979
8980 format %{ "movq $dst, $src\t# long->ptr" %}
8981 ins_encode %{
8982 if ($dst$$reg != $src$$reg) {
8983 __ movptr($dst$$Register, $src$$Register);
8984 }
8985 %}
8986 ins_pipe(ialu_reg_reg); // XXX
8987 %}
8988
8989 instruct castI2N(rRegN dst, rRegI src)
8990 %{
8991 match(Set dst (CastI2N src));
8992
8993 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8994 ins_encode %{
8995 if ($dst$$reg != $src$$reg) {
8996 __ movl($dst$$Register, $src$$Register);
8997 }
8998 %}
8999 ins_pipe(ialu_reg_reg); // XXX
9000 %}
9001
9002 instruct castN2X(rRegL dst, rRegN src)
9003 %{
9004 match(Set dst (CastP2X src));
9005
9006 format %{ "movq $dst, $src\t# ptr -> long" %}
9007 ins_encode %{
9008 if ($dst$$reg != $src$$reg) {
9009 __ movptr($dst$$Register, $src$$Register);
9010 }
9011 %}
9012 ins_pipe(ialu_reg_reg); // XXX
9013 %}
9014
9015 instruct castP2X(rRegL dst, rRegP src)
9016 %{
9017 match(Set dst (CastP2X src));
9018
9019 format %{ "movq $dst, $src\t# ptr -> long" %}
9020 ins_encode %{
9021 if ($dst$$reg != $src$$reg) {
9022 __ movptr($dst$$Register, $src$$Register);
9023 }
9024 %}
9025 ins_pipe(ialu_reg_reg); // XXX
9026 %}
9027
9028 // Convert oop into int for vectors alignment masking
9029 instruct convP2I(rRegI dst, rRegP src)
9030 %{
9031 match(Set dst (ConvL2I (CastP2X src)));
9032
9033 format %{ "movl $dst, $src\t# ptr -> int" %}
9034 ins_encode %{
9035 __ movl($dst$$Register, $src$$Register);
9036 %}
9037 ins_pipe(ialu_reg_reg); // XXX
9038 %}
9039
9040 // Convert compressed oop into int for vectors alignment masking
9041 // in case of 32bit oops (heap < 4Gb).
9042 instruct convN2I(rRegI dst, rRegN src)
9043 %{
9044 predicate(CompressedOops::shift() == 0);
9045 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9046
9047 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
9048 ins_encode %{
9049 __ movl($dst$$Register, $src$$Register);
9050 %}
9051 ins_pipe(ialu_reg_reg); // XXX
9052 %}
9053
9054 // Convert oop pointer into compressed form
9055 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
9056 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9057 match(Set dst (EncodeP src));
9058 effect(KILL cr);
9059 format %{ "encode_heap_oop $dst,$src" %}
9060 ins_encode %{
9061 Register s = $src$$Register;
9062 Register d = $dst$$Register;
9063 if (s != d) {
9064 __ movq(d, s);
9065 }
9066 __ encode_heap_oop(d);
9067 %}
9068 ins_pipe(ialu_reg_long);
9069 %}
9070
9071 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9072 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9073 match(Set dst (EncodeP src));
9074 effect(KILL cr);
9075 format %{ "encode_heap_oop_not_null $dst,$src" %}
9076 ins_encode %{
9077 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9078 %}
9079 ins_pipe(ialu_reg_long);
9080 %}
9081
9082 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9083 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9084 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9085 match(Set dst (DecodeN src));
9086 effect(KILL cr);
9087 format %{ "decode_heap_oop $dst,$src" %}
9088 ins_encode %{
9089 Register s = $src$$Register;
9090 Register d = $dst$$Register;
9091 if (s != d) {
9092 __ movq(d, s);
9093 }
9094 __ decode_heap_oop(d);
9095 %}
9096 ins_pipe(ialu_reg_long);
9097 %}
9098
9099 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9100 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9101 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9102 match(Set dst (DecodeN src));
9103 effect(KILL cr);
9104 format %{ "decode_heap_oop_not_null $dst,$src" %}
9105 ins_encode %{
9106 Register s = $src$$Register;
9107 Register d = $dst$$Register;
9108 if (s != d) {
9109 __ decode_heap_oop_not_null(d, s);
9110 } else {
9111 __ decode_heap_oop_not_null(d);
9112 }
9113 %}
9114 ins_pipe(ialu_reg_long);
9115 %}
9116
9117 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9118 match(Set dst (EncodePKlass src));
9119 effect(TEMP dst, KILL cr);
9120 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9121 ins_encode %{
9122 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9123 %}
9124 ins_pipe(ialu_reg_long);
9125 %}
9126
9127 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9128 match(Set dst (DecodeNKlass src));
9129 effect(TEMP dst, KILL cr);
9130 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9131 ins_encode %{
9132 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9133 %}
9134 ins_pipe(ialu_reg_long);
9135 %}
9136
9137 //----------Conditional Move---------------------------------------------------
9138 // Jump
9139 // dummy instruction for generating temp registers
9140 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9141 match(Jump (LShiftL switch_val shift));
9142 ins_cost(350);
9143 predicate(false);
9144 effect(TEMP dest);
9145
9146 format %{ "leaq $dest, [$constantaddress]\n\t"
9147 "jmp [$dest + $switch_val << $shift]\n\t" %}
9148 ins_encode %{
9149 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9150 // to do that and the compiler is using that register as one it can allocate.
9151 // So we build it all by hand.
9152 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9153 // ArrayAddress dispatch(table, index);
9154 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9155 __ lea($dest$$Register, $constantaddress);
9156 __ jmp(dispatch);
9157 %}
9158 ins_pipe(pipe_jmp);
9159 %}
9160
9161 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9162 match(Jump (AddL (LShiftL switch_val shift) offset));
9163 ins_cost(350);
9164 effect(TEMP dest);
9165
9166 format %{ "leaq $dest, [$constantaddress]\n\t"
9167 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9168 ins_encode %{
9169 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9170 // to do that and the compiler is using that register as one it can allocate.
9171 // So we build it all by hand.
9172 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9173 // ArrayAddress dispatch(table, index);
9174 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9175 __ lea($dest$$Register, $constantaddress);
9176 __ jmp(dispatch);
9177 %}
9178 ins_pipe(pipe_jmp);
9179 %}
9180
9181 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9182 match(Jump switch_val);
9183 ins_cost(350);
9184 effect(TEMP dest);
9185
9186 format %{ "leaq $dest, [$constantaddress]\n\t"
9187 "jmp [$dest + $switch_val]\n\t" %}
9188 ins_encode %{
9189 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9190 // to do that and the compiler is using that register as one it can allocate.
9191 // So we build it all by hand.
9192 // Address index(noreg, switch_reg, Address::times_1);
9193 // ArrayAddress dispatch(table, index);
9194 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9195 __ lea($dest$$Register, $constantaddress);
9196 __ jmp(dispatch);
9197 %}
9198 ins_pipe(pipe_jmp);
9199 %}
9200
9201 // Conditional move
9202 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9203 %{
9204 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9205 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9206
9207 ins_cost(100); // XXX
9208 format %{ "setbn$cop $dst\t# signed, int" %}
9209 ins_encode %{
9210 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9211 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9212 %}
9213 ins_pipe(ialu_reg);
9214 %}
9215
9216 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9217 %{
9218 predicate(!UseAPX);
9219 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9220
9221 ins_cost(200); // XXX
9222 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9223 ins_encode %{
9224 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9225 %}
9226 ins_pipe(pipe_cmov_reg);
9227 %}
9228
9229 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9230 %{
9231 predicate(UseAPX);
9232 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9233
9234 ins_cost(200);
9235 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9236 ins_encode %{
9237 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9238 %}
9239 ins_pipe(pipe_cmov_reg);
9240 %}
9241
9242 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9243 %{
9244 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9245 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9246
9247 ins_cost(100); // XXX
9248 format %{ "setbn$cop $dst\t# unsigned, int" %}
9249 ins_encode %{
9250 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9251 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9252 %}
9253 ins_pipe(ialu_reg);
9254 %}
9255
9256 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9257 predicate(!UseAPX);
9258 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9259
9260 ins_cost(200); // XXX
9261 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9262 ins_encode %{
9263 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9264 %}
9265 ins_pipe(pipe_cmov_reg);
9266 %}
9267
9268 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9269 predicate(UseAPX);
9270 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9271
9272 ins_cost(200);
9273 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9274 ins_encode %{
9275 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9276 %}
9277 ins_pipe(pipe_cmov_reg);
9278 %}
9279
9280 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9281 %{
9282 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9283 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9284
9285 ins_cost(100); // XXX
9286 format %{ "setbn$cop $dst\t# unsigned, int" %}
9287 ins_encode %{
9288 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9289 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9290 %}
9291 ins_pipe(ialu_reg);
9292 %}
9293
9294 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9295 %{
9296 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9297 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9298
9299 ins_cost(100); // XXX
9300 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9301 ins_encode %{
9302 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9303 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9304 %}
9305 ins_pipe(ialu_reg);
9306 %}
9307
9308 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9309 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9310
9311 ins_cost(200);
9312 expand %{
9313 cmovI_regU(cop, cr, dst, src);
9314 %}
9315 %}
9316
9317 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9318 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9319
9320 ins_cost(200);
9321 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9322 ins_encode %{
9323 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9324 %}
9325 ins_pipe(pipe_cmov_reg);
9326 %}
9327
9328 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9329 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9330 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9331
9332 ins_cost(200); // XXX
9333 format %{ "cmovpl $dst, $src\n\t"
9334 "cmovnel $dst, $src" %}
9335 ins_encode %{
9336 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9337 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9338 %}
9339 ins_pipe(pipe_cmov_reg);
9340 %}
9341
9342 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9343 // inputs of the CMove
9344 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9345 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9346 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9347 effect(TEMP dst);
9348
9349 ins_cost(200); // XXX
9350 format %{ "cmovpl $dst, $src\n\t"
9351 "cmovnel $dst, $src" %}
9352 ins_encode %{
9353 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9354 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9355 %}
9356 ins_pipe(pipe_cmov_reg);
9357 %}
9358
9359 // Conditional move
9360 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9361 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9362
9363 ins_cost(250); // XXX
9364 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9365 ins_encode %{
9366 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9367 %}
9368 ins_pipe(pipe_cmov_mem);
9369 %}
9370
9371 // Conditional move
9372 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9373 %{
9374 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9375
9376 ins_cost(250); // XXX
9377 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9378 ins_encode %{
9379 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9380 %}
9381 ins_pipe(pipe_cmov_mem);
9382 %}
9383
9384 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9385 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9386
9387 ins_cost(250);
9388 expand %{
9389 cmovI_memU(cop, cr, dst, src);
9390 %}
9391 %}
9392
9393 instruct cmovI_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI dst, memory src) %{
9394 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9395
9396 ins_cost(250); // XXX
9397 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9398 ins_encode %{
9399 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9400 %}
9401 ins_pipe(pipe_cmov_mem);
9402 %}
9403
9404 // Conditional move
9405 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9406 %{
9407 predicate(!UseAPX);
9408 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9409
9410 ins_cost(200); // XXX
9411 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9412 ins_encode %{
9413 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9414 %}
9415 ins_pipe(pipe_cmov_reg);
9416 %}
9417
9418 // Conditional move ndd
9419 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9420 %{
9421 predicate(UseAPX);
9422 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9423
9424 ins_cost(200);
9425 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9426 ins_encode %{
9427 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9428 %}
9429 ins_pipe(pipe_cmov_reg);
9430 %}
9431
9432 // Conditional move
9433 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9434 %{
9435 predicate(!UseAPX);
9436 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9437
9438 ins_cost(200); // XXX
9439 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9440 ins_encode %{
9441 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9442 %}
9443 ins_pipe(pipe_cmov_reg);
9444 %}
9445
9446 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9447 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9448
9449 ins_cost(200);
9450 expand %{
9451 cmovN_regU(cop, cr, dst, src);
9452 %}
9453 %}
9454
9455 // Conditional move ndd
9456 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9457 %{
9458 predicate(UseAPX);
9459 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9460
9461 ins_cost(200);
9462 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9463 ins_encode %{
9464 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9465 %}
9466 ins_pipe(pipe_cmov_reg);
9467 %}
9468
9469 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9470 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9471
9472 ins_cost(200);
9473 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9474 ins_encode %{
9475 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9476 %}
9477 ins_pipe(pipe_cmov_reg);
9478 %}
9479
9480 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9481 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9482 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9483
9484 ins_cost(200); // XXX
9485 format %{ "cmovpl $dst, $src\n\t"
9486 "cmovnel $dst, $src" %}
9487 ins_encode %{
9488 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9489 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9490 %}
9491 ins_pipe(pipe_cmov_reg);
9492 %}
9493
9494 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9495 // inputs of the CMove
9496 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9497 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9498 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9499
9500 ins_cost(200); // XXX
9501 format %{ "cmovpl $dst, $src\n\t"
9502 "cmovnel $dst, $src" %}
9503 ins_encode %{
9504 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9505 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9506 %}
9507 ins_pipe(pipe_cmov_reg);
9508 %}
9509
9510 // Conditional move
9511 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9512 %{
9513 predicate(!UseAPX);
9514 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9515
9516 ins_cost(200); // XXX
9517 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9518 ins_encode %{
9519 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9520 %}
9521 ins_pipe(pipe_cmov_reg); // XXX
9522 %}
9523
9524 // Conditional move ndd
9525 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9526 %{
9527 predicate(UseAPX);
9528 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9529
9530 ins_cost(200);
9531 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9532 ins_encode %{
9533 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9534 %}
9535 ins_pipe(pipe_cmov_reg);
9536 %}
9537
9538 // Conditional move
9539 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9540 %{
9541 predicate(!UseAPX);
9542 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9543
9544 ins_cost(200); // XXX
9545 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9546 ins_encode %{
9547 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9548 %}
9549 ins_pipe(pipe_cmov_reg); // XXX
9550 %}
9551
9552 // Conditional move ndd
9553 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9554 %{
9555 predicate(UseAPX);
9556 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9557
9558 ins_cost(200);
9559 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9560 ins_encode %{
9561 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9562 %}
9563 ins_pipe(pipe_cmov_reg);
9564 %}
9565
9566 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9567 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9568
9569 ins_cost(200);
9570 expand %{
9571 cmovP_regU(cop, cr, dst, src);
9572 %}
9573 %}
9574
9575 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9576 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9577
9578 ins_cost(200);
9579 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9580 ins_encode %{
9581 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9582 %}
9583 ins_pipe(pipe_cmov_reg);
9584 %}
9585
9586 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9587 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9588 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9589
9590 ins_cost(200); // XXX
9591 format %{ "cmovpq $dst, $src\n\t"
9592 "cmovneq $dst, $src" %}
9593 ins_encode %{
9594 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9595 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9596 %}
9597 ins_pipe(pipe_cmov_reg);
9598 %}
9599
9600 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9601 // inputs of the CMove
9602 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9603 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9604 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9605
9606 ins_cost(200); // XXX
9607 format %{ "cmovpq $dst, $src\n\t"
9608 "cmovneq $dst, $src" %}
9609 ins_encode %{
9610 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9611 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9612 %}
9613 ins_pipe(pipe_cmov_reg);
9614 %}
9615
9616 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9617 %{
9618 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9619 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9620
9621 ins_cost(100); // XXX
9622 format %{ "setbn$cop $dst\t# signed, long" %}
9623 ins_encode %{
9624 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9625 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9626 %}
9627 ins_pipe(ialu_reg);
9628 %}
9629
9630 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9631 %{
9632 predicate(!UseAPX);
9633 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9634
9635 ins_cost(200); // XXX
9636 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9637 ins_encode %{
9638 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9639 %}
9640 ins_pipe(pipe_cmov_reg); // XXX
9641 %}
9642
9643 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9644 %{
9645 predicate(UseAPX);
9646 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9647
9648 ins_cost(200);
9649 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9650 ins_encode %{
9651 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9652 %}
9653 ins_pipe(pipe_cmov_reg);
9654 %}
9655
9656 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9657 %{
9658 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9659
9660 ins_cost(200); // XXX
9661 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9662 ins_encode %{
9663 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9664 %}
9665 ins_pipe(pipe_cmov_mem); // XXX
9666 %}
9667
9668 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9669 %{
9670 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9671 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9672
9673 ins_cost(100); // XXX
9674 format %{ "setbn$cop $dst\t# unsigned, long" %}
9675 ins_encode %{
9676 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9677 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9678 %}
9679 ins_pipe(ialu_reg);
9680 %}
9681
9682 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9683 %{
9684 predicate(!UseAPX);
9685 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9686
9687 ins_cost(200); // XXX
9688 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9689 ins_encode %{
9690 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9691 %}
9692 ins_pipe(pipe_cmov_reg); // XXX
9693 %}
9694
9695 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9696 %{
9697 predicate(UseAPX);
9698 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9699
9700 ins_cost(200);
9701 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9702 ins_encode %{
9703 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9704 %}
9705 ins_pipe(pipe_cmov_reg);
9706 %}
9707
9708 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9709 %{
9710 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9711 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9712
9713 ins_cost(100); // XXX
9714 format %{ "setbn$cop $dst\t# unsigned, long" %}
9715 ins_encode %{
9716 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9717 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9718 %}
9719 ins_pipe(ialu_reg);
9720 %}
9721
9722 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9723 %{
9724 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9725 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9726
9727 ins_cost(100); // XXX
9728 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9729 ins_encode %{
9730 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9731 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9732 %}
9733 ins_pipe(ialu_reg);
9734 %}
9735
9736 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9737 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9738
9739 ins_cost(200);
9740 expand %{
9741 cmovL_regU(cop, cr, dst, src);
9742 %}
9743 %}
9744
9745 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9746 %{
9747 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9748
9749 ins_cost(200);
9750 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9751 ins_encode %{
9752 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9753 %}
9754 ins_pipe(pipe_cmov_reg);
9755 %}
9756
9757 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9758 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9759 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9760
9761 ins_cost(200); // XXX
9762 format %{ "cmovpq $dst, $src\n\t"
9763 "cmovneq $dst, $src" %}
9764 ins_encode %{
9765 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9766 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9767 %}
9768 ins_pipe(pipe_cmov_reg);
9769 %}
9770
9771 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9772 // inputs of the CMove
9773 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9774 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9775 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9776
9777 ins_cost(200); // XXX
9778 format %{ "cmovpq $dst, $src\n\t"
9779 "cmovneq $dst, $src" %}
9780 ins_encode %{
9781 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9782 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9783 %}
9784 ins_pipe(pipe_cmov_reg);
9785 %}
9786
9787 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9788 %{
9789 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9790
9791 ins_cost(200); // XXX
9792 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9793 ins_encode %{
9794 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9795 %}
9796 ins_pipe(pipe_cmov_mem); // XXX
9797 %}
9798
9799 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9800 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9801
9802 ins_cost(200);
9803 expand %{
9804 cmovL_memU(cop, cr, dst, src);
9805 %}
9806 %}
9807
9808 instruct cmovL_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL dst, memory src) %{
9809 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9810
9811 ins_cost(200); // XXX
9812 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9813 ins_encode %{
9814 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9815 %}
9816 ins_pipe(pipe_cmov_mem); // XXX
9817 %}
9818
9819 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9820 %{
9821 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9822
9823 ins_cost(200); // XXX
9824 format %{ "jn$cop skip\t# signed cmove float\n\t"
9825 "movss $dst, $src\n"
9826 "skip:" %}
9827 ins_encode %{
9828 Label Lskip;
9829 // Invert sense of branch from sense of CMOV
9830 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9831 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9832 __ bind(Lskip);
9833 %}
9834 ins_pipe(pipe_slow);
9835 %}
9836
9837 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9838 %{
9839 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9840
9841 ins_cost(200); // XXX
9842 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9843 "movss $dst, $src\n"
9844 "skip:" %}
9845 ins_encode %{
9846 Label Lskip;
9847 // Invert sense of branch from sense of CMOV
9848 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9849 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9850 __ bind(Lskip);
9851 %}
9852 ins_pipe(pipe_slow);
9853 %}
9854
9855 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9856 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9857
9858 ins_cost(200);
9859 expand %{
9860 cmovF_regU(cop, cr, dst, src);
9861 %}
9862 %}
9863
9864 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9865 %{
9866 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9867
9868 ins_cost(200); // XXX
9869 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9870 "movss $dst, $src\n"
9871 "skip:" %}
9872 ins_encode %{
9873 Label Lskip;
9874 // Invert sense of branch from sense of CMOV
9875 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9876 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9877 __ bind(Lskip);
9878 %}
9879 ins_pipe(pipe_slow);
9880 %}
9881
9882 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9883 %{
9884 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9885
9886 ins_cost(200); // XXX
9887 format %{ "jn$cop skip\t# signed cmove double\n\t"
9888 "movsd $dst, $src\n"
9889 "skip:" %}
9890 ins_encode %{
9891 Label Lskip;
9892 // Invert sense of branch from sense of CMOV
9893 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9894 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9895 __ bind(Lskip);
9896 %}
9897 ins_pipe(pipe_slow);
9898 %}
9899
9900 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9901 %{
9902 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9903
9904 ins_cost(200); // XXX
9905 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9906 "movsd $dst, $src\n"
9907 "skip:" %}
9908 ins_encode %{
9909 Label Lskip;
9910 // Invert sense of branch from sense of CMOV
9911 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9912 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9913 __ bind(Lskip);
9914 %}
9915 ins_pipe(pipe_slow);
9916 %}
9917
9918 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9919 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9920
9921 ins_cost(200);
9922 expand %{
9923 cmovD_regU(cop, cr, dst, src);
9924 %}
9925 %}
9926
9927 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9928 %{
9929 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9930
9931 ins_cost(200); // XXX
9932 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9933 "movsd $dst, $src\n"
9934 "skip:" %}
9935 ins_encode %{
9936 Label Lskip;
9937 // Invert sense of branch from sense of CMOV
9938 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9939 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9940 __ bind(Lskip);
9941 %}
9942 ins_pipe(pipe_slow);
9943 %}
9944
9945 //----------Arithmetic Instructions--------------------------------------------
9946 //----------Addition Instructions----------------------------------------------
9947
9948 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9949 %{
9950 predicate(!UseAPX);
9951 match(Set dst (AddI dst src));
9952 effect(KILL cr);
9953 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9954 format %{ "addl $dst, $src\t# int" %}
9955 ins_encode %{
9956 __ addl($dst$$Register, $src$$Register);
9957 %}
9958 ins_pipe(ialu_reg_reg);
9959 %}
9960
9961 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9962 %{
9963 predicate(UseAPX);
9964 match(Set dst (AddI src1 src2));
9965 effect(KILL cr);
9966 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9967
9968 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9969 ins_encode %{
9970 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9971 %}
9972 ins_pipe(ialu_reg_reg);
9973 %}
9974
9975 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9976 %{
9977 predicate(!UseAPX);
9978 match(Set dst (AddI dst src));
9979 effect(KILL cr);
9980 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9981
9982 format %{ "addl $dst, $src\t# int" %}
9983 ins_encode %{
9984 __ addl($dst$$Register, $src$$constant);
9985 %}
9986 ins_pipe( ialu_reg );
9987 %}
9988
9989 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9990 %{
9991 predicate(UseAPX);
9992 match(Set dst (AddI src1 src2));
9993 effect(KILL cr);
9994 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9995
9996 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9997 ins_encode %{
9998 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9999 %}
10000 ins_pipe( ialu_reg );
10001 %}
10002
10003 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10004 %{
10005 match(Set dst (AddI dst (LoadI src)));
10006 effect(KILL cr);
10007 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10008
10009 ins_cost(150); // XXX
10010 format %{ "addl $dst, $src\t# int" %}
10011 ins_encode %{
10012 __ addl($dst$$Register, $src$$Address);
10013 %}
10014 ins_pipe(ialu_reg_mem);
10015 %}
10016
10017 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10018 %{
10019 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10020 effect(KILL cr);
10021 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10022
10023 ins_cost(150); // XXX
10024 format %{ "addl $dst, $src\t# int" %}
10025 ins_encode %{
10026 __ addl($dst$$Address, $src$$Register);
10027 %}
10028 ins_pipe(ialu_mem_reg);
10029 %}
10030
10031 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10032 %{
10033 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10034 effect(KILL cr);
10035 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10036
10037
10038 ins_cost(125); // XXX
10039 format %{ "addl $dst, $src\t# int" %}
10040 ins_encode %{
10041 __ addl($dst$$Address, $src$$constant);
10042 %}
10043 ins_pipe(ialu_mem_imm);
10044 %}
10045
10046 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10047 %{
10048 predicate(!UseAPX && UseIncDec);
10049 match(Set dst (AddI dst src));
10050 effect(KILL cr);
10051
10052 format %{ "incl $dst\t# int" %}
10053 ins_encode %{
10054 __ incrementl($dst$$Register);
10055 %}
10056 ins_pipe(ialu_reg);
10057 %}
10058
10059 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10060 %{
10061 predicate(UseAPX && UseIncDec);
10062 match(Set dst (AddI src val));
10063 effect(KILL cr);
10064 flag(PD::Flag_ndd_demotable_opr1);
10065
10066 format %{ "eincl $dst, $src\t# int ndd" %}
10067 ins_encode %{
10068 __ eincl($dst$$Register, $src$$Register, false);
10069 %}
10070 ins_pipe(ialu_reg);
10071 %}
10072
10073 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10074 %{
10075 predicate(UseIncDec);
10076 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10077 effect(KILL cr);
10078
10079 ins_cost(125); // XXX
10080 format %{ "incl $dst\t# int" %}
10081 ins_encode %{
10082 __ incrementl($dst$$Address);
10083 %}
10084 ins_pipe(ialu_mem_imm);
10085 %}
10086
10087 // XXX why does that use AddI
10088 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10089 %{
10090 predicate(!UseAPX && UseIncDec);
10091 match(Set dst (AddI dst src));
10092 effect(KILL cr);
10093
10094 format %{ "decl $dst\t# int" %}
10095 ins_encode %{
10096 __ decrementl($dst$$Register);
10097 %}
10098 ins_pipe(ialu_reg);
10099 %}
10100
10101 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10102 %{
10103 predicate(UseAPX && UseIncDec);
10104 match(Set dst (AddI src val));
10105 effect(KILL cr);
10106 flag(PD::Flag_ndd_demotable_opr1);
10107
10108 format %{ "edecl $dst, $src\t# int ndd" %}
10109 ins_encode %{
10110 __ edecl($dst$$Register, $src$$Register, false);
10111 %}
10112 ins_pipe(ialu_reg);
10113 %}
10114
10115 // XXX why does that use AddI
10116 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10117 %{
10118 predicate(UseIncDec);
10119 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10120 effect(KILL cr);
10121
10122 ins_cost(125); // XXX
10123 format %{ "decl $dst\t# int" %}
10124 ins_encode %{
10125 __ decrementl($dst$$Address);
10126 %}
10127 ins_pipe(ialu_mem_imm);
10128 %}
10129
10130 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10131 %{
10132 predicate(VM_Version::supports_fast_2op_lea());
10133 match(Set dst (AddI (LShiftI index scale) disp));
10134
10135 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10136 ins_encode %{
10137 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10138 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10139 %}
10140 ins_pipe(ialu_reg_reg);
10141 %}
10142
10143 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10144 %{
10145 predicate(VM_Version::supports_fast_3op_lea());
10146 match(Set dst (AddI (AddI base index) disp));
10147
10148 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10149 ins_encode %{
10150 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10151 %}
10152 ins_pipe(ialu_reg_reg);
10153 %}
10154
10155 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10156 %{
10157 predicate(VM_Version::supports_fast_2op_lea());
10158 match(Set dst (AddI base (LShiftI index scale)));
10159
10160 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10161 ins_encode %{
10162 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10163 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10164 %}
10165 ins_pipe(ialu_reg_reg);
10166 %}
10167
10168 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10169 %{
10170 predicate(VM_Version::supports_fast_3op_lea());
10171 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10172
10173 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10174 ins_encode %{
10175 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10176 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10177 %}
10178 ins_pipe(ialu_reg_reg);
10179 %}
10180
10181 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10182 %{
10183 predicate(!UseAPX);
10184 match(Set dst (AddL dst src));
10185 effect(KILL cr);
10186 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10187
10188 format %{ "addq $dst, $src\t# long" %}
10189 ins_encode %{
10190 __ addq($dst$$Register, $src$$Register);
10191 %}
10192 ins_pipe(ialu_reg_reg);
10193 %}
10194
10195 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10196 %{
10197 predicate(UseAPX);
10198 match(Set dst (AddL src1 src2));
10199 effect(KILL cr);
10200 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10201
10202 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10203 ins_encode %{
10204 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10205 %}
10206 ins_pipe(ialu_reg_reg);
10207 %}
10208
10209 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10210 %{
10211 predicate(!UseAPX);
10212 match(Set dst (AddL dst src));
10213 effect(KILL cr);
10214 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10215
10216 format %{ "addq $dst, $src\t# long" %}
10217 ins_encode %{
10218 __ addq($dst$$Register, $src$$constant);
10219 %}
10220 ins_pipe( ialu_reg );
10221 %}
10222
10223 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10224 %{
10225 predicate(UseAPX);
10226 match(Set dst (AddL src1 src2));
10227 effect(KILL cr);
10228 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10229
10230 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10231 ins_encode %{
10232 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10233 %}
10234 ins_pipe( ialu_reg );
10235 %}
10236
10237 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10238 %{
10239 match(Set dst (AddL dst (LoadL src)));
10240 effect(KILL cr);
10241 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10242
10243 ins_cost(150); // XXX
10244 format %{ "addq $dst, $src\t# long" %}
10245 ins_encode %{
10246 __ addq($dst$$Register, $src$$Address);
10247 %}
10248 ins_pipe(ialu_reg_mem);
10249 %}
10250
10251 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10252 %{
10253 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10254 effect(KILL cr);
10255 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10256
10257 ins_cost(150); // XXX
10258 format %{ "addq $dst, $src\t# long" %}
10259 ins_encode %{
10260 __ addq($dst$$Address, $src$$Register);
10261 %}
10262 ins_pipe(ialu_mem_reg);
10263 %}
10264
10265 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10266 %{
10267 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10268 effect(KILL cr);
10269 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10270
10271 ins_cost(125); // XXX
10272 format %{ "addq $dst, $src\t# long" %}
10273 ins_encode %{
10274 __ addq($dst$$Address, $src$$constant);
10275 %}
10276 ins_pipe(ialu_mem_imm);
10277 %}
10278
10279 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10280 %{
10281 predicate(!UseAPX && UseIncDec);
10282 match(Set dst (AddL dst src));
10283 effect(KILL cr);
10284
10285 format %{ "incq $dst\t# long" %}
10286 ins_encode %{
10287 __ incrementq($dst$$Register);
10288 %}
10289 ins_pipe(ialu_reg);
10290 %}
10291
10292 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10293 %{
10294 predicate(UseAPX && UseIncDec);
10295 match(Set dst (AddL src val));
10296 effect(KILL cr);
10297 flag(PD::Flag_ndd_demotable_opr1);
10298
10299 format %{ "eincq $dst, $src\t# long ndd" %}
10300 ins_encode %{
10301 __ eincq($dst$$Register, $src$$Register, false);
10302 %}
10303 ins_pipe(ialu_reg);
10304 %}
10305
10306 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10307 %{
10308 predicate(UseIncDec);
10309 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10310 effect(KILL cr);
10311
10312 ins_cost(125); // XXX
10313 format %{ "incq $dst\t# long" %}
10314 ins_encode %{
10315 __ incrementq($dst$$Address);
10316 %}
10317 ins_pipe(ialu_mem_imm);
10318 %}
10319
10320 // XXX why does that use AddL
10321 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10322 %{
10323 predicate(!UseAPX && UseIncDec);
10324 match(Set dst (AddL dst src));
10325 effect(KILL cr);
10326
10327 format %{ "decq $dst\t# long" %}
10328 ins_encode %{
10329 __ decrementq($dst$$Register);
10330 %}
10331 ins_pipe(ialu_reg);
10332 %}
10333
10334 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10335 %{
10336 predicate(UseAPX && UseIncDec);
10337 match(Set dst (AddL src val));
10338 effect(KILL cr);
10339 flag(PD::Flag_ndd_demotable_opr1);
10340
10341 format %{ "edecq $dst, $src\t# long ndd" %}
10342 ins_encode %{
10343 __ edecq($dst$$Register, $src$$Register, false);
10344 %}
10345 ins_pipe(ialu_reg);
10346 %}
10347
10348 // XXX why does that use AddL
10349 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10350 %{
10351 predicate(UseIncDec);
10352 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10353 effect(KILL cr);
10354
10355 ins_cost(125); // XXX
10356 format %{ "decq $dst\t# long" %}
10357 ins_encode %{
10358 __ decrementq($dst$$Address);
10359 %}
10360 ins_pipe(ialu_mem_imm);
10361 %}
10362
10363 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10364 %{
10365 predicate(VM_Version::supports_fast_2op_lea());
10366 match(Set dst (AddL (LShiftL index scale) disp));
10367
10368 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10369 ins_encode %{
10370 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10371 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10372 %}
10373 ins_pipe(ialu_reg_reg);
10374 %}
10375
10376 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10377 %{
10378 predicate(VM_Version::supports_fast_3op_lea());
10379 match(Set dst (AddL (AddL base index) disp));
10380
10381 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10382 ins_encode %{
10383 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10384 %}
10385 ins_pipe(ialu_reg_reg);
10386 %}
10387
10388 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10389 %{
10390 predicate(VM_Version::supports_fast_2op_lea());
10391 match(Set dst (AddL base (LShiftL index scale)));
10392
10393 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10394 ins_encode %{
10395 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10396 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10397 %}
10398 ins_pipe(ialu_reg_reg);
10399 %}
10400
10401 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10402 %{
10403 predicate(VM_Version::supports_fast_3op_lea());
10404 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10405
10406 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10407 ins_encode %{
10408 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10409 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10410 %}
10411 ins_pipe(ialu_reg_reg);
10412 %}
10413
10414 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10415 %{
10416 match(Set dst (AddP dst src));
10417 effect(KILL cr);
10418 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10419
10420 format %{ "addq $dst, $src\t# ptr" %}
10421 ins_encode %{
10422 __ addq($dst$$Register, $src$$Register);
10423 %}
10424 ins_pipe(ialu_reg_reg);
10425 %}
10426
10427 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10428 %{
10429 match(Set dst (AddP dst src));
10430 effect(KILL cr);
10431 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10432
10433 format %{ "addq $dst, $src\t# ptr" %}
10434 ins_encode %{
10435 __ addq($dst$$Register, $src$$constant);
10436 %}
10437 ins_pipe( ialu_reg );
10438 %}
10439
10440 // XXX addP mem ops ????
10441
10442 instruct checkCastPP(rRegP dst)
10443 %{
10444 match(Set dst (CheckCastPP dst));
10445
10446 size(0);
10447 format %{ "# checkcastPP of $dst" %}
10448 ins_encode(/* empty encoding */);
10449 ins_pipe(empty);
10450 %}
10451
10452 instruct castPP(rRegP dst)
10453 %{
10454 match(Set dst (CastPP dst));
10455
10456 size(0);
10457 format %{ "# castPP of $dst" %}
10458 ins_encode(/* empty encoding */);
10459 ins_pipe(empty);
10460 %}
10461
10462 instruct castII(rRegI dst)
10463 %{
10464 predicate(VerifyConstraintCasts == 0);
10465 match(Set dst (CastII dst));
10466
10467 size(0);
10468 format %{ "# castII of $dst" %}
10469 ins_encode(/* empty encoding */);
10470 ins_cost(0);
10471 ins_pipe(empty);
10472 %}
10473
10474 instruct castII_checked(rRegI dst, rFlagsReg cr)
10475 %{
10476 predicate(VerifyConstraintCasts > 0);
10477 match(Set dst (CastII dst));
10478
10479 effect(KILL cr);
10480 format %{ "# cast_checked_II $dst" %}
10481 ins_encode %{
10482 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10483 %}
10484 ins_pipe(pipe_slow);
10485 %}
10486
10487 instruct castLL(rRegL dst)
10488 %{
10489 predicate(VerifyConstraintCasts == 0);
10490 match(Set dst (CastLL dst));
10491
10492 size(0);
10493 format %{ "# castLL of $dst" %}
10494 ins_encode(/* empty encoding */);
10495 ins_cost(0);
10496 ins_pipe(empty);
10497 %}
10498
10499 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10500 %{
10501 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10502 match(Set dst (CastLL dst));
10503
10504 effect(KILL cr);
10505 format %{ "# cast_checked_LL $dst" %}
10506 ins_encode %{
10507 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10508 %}
10509 ins_pipe(pipe_slow);
10510 %}
10511
10512 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10513 %{
10514 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10515 match(Set dst (CastLL dst));
10516
10517 effect(KILL cr, TEMP tmp);
10518 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10519 ins_encode %{
10520 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10521 %}
10522 ins_pipe(pipe_slow);
10523 %}
10524
10525 instruct castFF(regF dst)
10526 %{
10527 match(Set dst (CastFF dst));
10528
10529 size(0);
10530 format %{ "# castFF of $dst" %}
10531 ins_encode(/* empty encoding */);
10532 ins_cost(0);
10533 ins_pipe(empty);
10534 %}
10535
10536 instruct castHH(regF dst)
10537 %{
10538 match(Set dst (CastHH dst));
10539
10540 size(0);
10541 format %{ "# castHH of $dst" %}
10542 ins_encode(/* empty encoding */);
10543 ins_cost(0);
10544 ins_pipe(empty);
10545 %}
10546
10547 instruct castDD(regD dst)
10548 %{
10549 match(Set dst (CastDD dst));
10550
10551 size(0);
10552 format %{ "# castDD of $dst" %}
10553 ins_encode(/* empty encoding */);
10554 ins_cost(0);
10555 ins_pipe(empty);
10556 %}
10557
10558 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10559 instruct compareAndSwapP(rRegI res,
10560 memory mem_ptr,
10561 rax_RegP oldval, rRegP newval,
10562 rFlagsReg cr)
10563 %{
10564 predicate(n->as_LoadStore()->barrier_data() == 0);
10565 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10566 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10567 effect(KILL cr, KILL oldval);
10568
10569 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10570 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10571 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10572 ins_encode %{
10573 __ lock();
10574 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10575 __ setcc(Assembler::equal, $res$$Register);
10576 %}
10577 ins_pipe( pipe_cmpxchg );
10578 %}
10579
10580 instruct compareAndSwapL(rRegI res,
10581 memory mem_ptr,
10582 rax_RegL oldval, rRegL newval,
10583 rFlagsReg cr)
10584 %{
10585 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10586 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10587 effect(KILL cr, KILL oldval);
10588
10589 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10590 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10591 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10592 ins_encode %{
10593 __ lock();
10594 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10595 __ setcc(Assembler::equal, $res$$Register);
10596 %}
10597 ins_pipe( pipe_cmpxchg );
10598 %}
10599
10600 instruct compareAndSwapI(rRegI res,
10601 memory mem_ptr,
10602 rax_RegI oldval, rRegI newval,
10603 rFlagsReg cr)
10604 %{
10605 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10606 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10607 effect(KILL cr, KILL oldval);
10608
10609 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10610 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10611 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10612 ins_encode %{
10613 __ lock();
10614 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10615 __ setcc(Assembler::equal, $res$$Register);
10616 %}
10617 ins_pipe( pipe_cmpxchg );
10618 %}
10619
10620 instruct compareAndSwapB(rRegI res,
10621 memory mem_ptr,
10622 rax_RegI oldval, rRegI newval,
10623 rFlagsReg cr)
10624 %{
10625 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10626 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10627 effect(KILL cr, KILL oldval);
10628
10629 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10630 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10631 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10632 ins_encode %{
10633 __ lock();
10634 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10635 __ setcc(Assembler::equal, $res$$Register);
10636 %}
10637 ins_pipe( pipe_cmpxchg );
10638 %}
10639
10640 instruct compareAndSwapS(rRegI res,
10641 memory mem_ptr,
10642 rax_RegI oldval, rRegI newval,
10643 rFlagsReg cr)
10644 %{
10645 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10646 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10647 effect(KILL cr, KILL oldval);
10648
10649 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10650 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10651 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10652 ins_encode %{
10653 __ lock();
10654 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10655 __ setcc(Assembler::equal, $res$$Register);
10656 %}
10657 ins_pipe( pipe_cmpxchg );
10658 %}
10659
10660 instruct compareAndSwapN(rRegI res,
10661 memory mem_ptr,
10662 rax_RegN oldval, rRegN newval,
10663 rFlagsReg cr) %{
10664 predicate(n->as_LoadStore()->barrier_data() == 0);
10665 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10666 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10667 effect(KILL cr, KILL oldval);
10668
10669 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10670 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10671 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10672 ins_encode %{
10673 __ lock();
10674 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10675 __ setcc(Assembler::equal, $res$$Register);
10676 %}
10677 ins_pipe( pipe_cmpxchg );
10678 %}
10679
10680 instruct compareAndExchangeB(
10681 memory mem_ptr,
10682 rax_RegI oldval, rRegI newval,
10683 rFlagsReg cr)
10684 %{
10685 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10686 effect(KILL cr);
10687
10688 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10689 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10690 ins_encode %{
10691 __ lock();
10692 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10693 %}
10694 ins_pipe( pipe_cmpxchg );
10695 %}
10696
10697 instruct compareAndExchangeS(
10698 memory mem_ptr,
10699 rax_RegI oldval, rRegI newval,
10700 rFlagsReg cr)
10701 %{
10702 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10703 effect(KILL cr);
10704
10705 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10706 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10707 ins_encode %{
10708 __ lock();
10709 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10710 %}
10711 ins_pipe( pipe_cmpxchg );
10712 %}
10713
10714 instruct compareAndExchangeI(
10715 memory mem_ptr,
10716 rax_RegI oldval, rRegI newval,
10717 rFlagsReg cr)
10718 %{
10719 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10720 effect(KILL cr);
10721
10722 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10723 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10724 ins_encode %{
10725 __ lock();
10726 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10727 %}
10728 ins_pipe( pipe_cmpxchg );
10729 %}
10730
10731 instruct compareAndExchangeL(
10732 memory mem_ptr,
10733 rax_RegL oldval, rRegL newval,
10734 rFlagsReg cr)
10735 %{
10736 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10737 effect(KILL cr);
10738
10739 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10740 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10741 ins_encode %{
10742 __ lock();
10743 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10744 %}
10745 ins_pipe( pipe_cmpxchg );
10746 %}
10747
10748 instruct compareAndExchangeN(
10749 memory mem_ptr,
10750 rax_RegN oldval, rRegN newval,
10751 rFlagsReg cr) %{
10752 predicate(n->as_LoadStore()->barrier_data() == 0);
10753 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10754 effect(KILL cr);
10755
10756 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10757 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10758 ins_encode %{
10759 __ lock();
10760 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10761 %}
10762 ins_pipe( pipe_cmpxchg );
10763 %}
10764
10765 instruct compareAndExchangeP(
10766 memory mem_ptr,
10767 rax_RegP oldval, rRegP newval,
10768 rFlagsReg cr)
10769 %{
10770 predicate(n->as_LoadStore()->barrier_data() == 0);
10771 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10772 effect(KILL cr);
10773
10774 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10775 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10776 ins_encode %{
10777 __ lock();
10778 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10779 %}
10780 ins_pipe( pipe_cmpxchg );
10781 %}
10782
10783 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10784 predicate(n->as_LoadStore()->result_not_used());
10785 match(Set dummy (GetAndAddB mem add));
10786 effect(KILL cr);
10787 format %{ "addb_lock $mem, $add" %}
10788 ins_encode %{
10789 __ lock();
10790 __ addb($mem$$Address, $add$$Register);
10791 %}
10792 ins_pipe(pipe_cmpxchg);
10793 %}
10794
10795 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10796 predicate(n->as_LoadStore()->result_not_used());
10797 match(Set dummy (GetAndAddB mem add));
10798 effect(KILL cr);
10799 format %{ "addb_lock $mem, $add" %}
10800 ins_encode %{
10801 __ lock();
10802 __ addb($mem$$Address, $add$$constant);
10803 %}
10804 ins_pipe(pipe_cmpxchg);
10805 %}
10806
10807 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10808 predicate(!n->as_LoadStore()->result_not_used());
10809 match(Set newval (GetAndAddB mem newval));
10810 effect(KILL cr);
10811 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10812 ins_encode %{
10813 __ lock();
10814 __ xaddb($mem$$Address, $newval$$Register);
10815 __ narrow_subword_type($newval$$Register, T_BYTE);
10816 %}
10817 ins_pipe(pipe_cmpxchg);
10818 %}
10819
10820 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10821 predicate(n->as_LoadStore()->result_not_used());
10822 match(Set dummy (GetAndAddS mem add));
10823 effect(KILL cr);
10824 format %{ "addw_lock $mem, $add" %}
10825 ins_encode %{
10826 __ lock();
10827 __ addw($mem$$Address, $add$$Register);
10828 %}
10829 ins_pipe(pipe_cmpxchg);
10830 %}
10831
10832 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10833 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10834 match(Set dummy (GetAndAddS mem add));
10835 effect(KILL cr);
10836 format %{ "addw_lock $mem, $add" %}
10837 ins_encode %{
10838 __ lock();
10839 __ addw($mem$$Address, $add$$constant);
10840 %}
10841 ins_pipe(pipe_cmpxchg);
10842 %}
10843
10844 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10845 predicate(!n->as_LoadStore()->result_not_used());
10846 match(Set newval (GetAndAddS mem newval));
10847 effect(KILL cr);
10848 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
10849 ins_encode %{
10850 __ lock();
10851 __ xaddw($mem$$Address, $newval$$Register);
10852 __ narrow_subword_type($newval$$Register, T_SHORT);
10853 %}
10854 ins_pipe(pipe_cmpxchg);
10855 %}
10856
10857 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10858 predicate(n->as_LoadStore()->result_not_used());
10859 match(Set dummy (GetAndAddI mem add));
10860 effect(KILL cr);
10861 format %{ "addl_lock $mem, $add" %}
10862 ins_encode %{
10863 __ lock();
10864 __ addl($mem$$Address, $add$$Register);
10865 %}
10866 ins_pipe(pipe_cmpxchg);
10867 %}
10868
10869 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10870 predicate(n->as_LoadStore()->result_not_used());
10871 match(Set dummy (GetAndAddI mem add));
10872 effect(KILL cr);
10873 format %{ "addl_lock $mem, $add" %}
10874 ins_encode %{
10875 __ lock();
10876 __ addl($mem$$Address, $add$$constant);
10877 %}
10878 ins_pipe(pipe_cmpxchg);
10879 %}
10880
10881 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10882 predicate(!n->as_LoadStore()->result_not_used());
10883 match(Set newval (GetAndAddI mem newval));
10884 effect(KILL cr);
10885 format %{ "xaddl_lock $mem, $newval" %}
10886 ins_encode %{
10887 __ lock();
10888 __ xaddl($mem$$Address, $newval$$Register);
10889 %}
10890 ins_pipe(pipe_cmpxchg);
10891 %}
10892
10893 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10894 predicate(n->as_LoadStore()->result_not_used());
10895 match(Set dummy (GetAndAddL mem add));
10896 effect(KILL cr);
10897 format %{ "addq_lock $mem, $add" %}
10898 ins_encode %{
10899 __ lock();
10900 __ addq($mem$$Address, $add$$Register);
10901 %}
10902 ins_pipe(pipe_cmpxchg);
10903 %}
10904
10905 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10906 predicate(n->as_LoadStore()->result_not_used());
10907 match(Set dummy (GetAndAddL mem add));
10908 effect(KILL cr);
10909 format %{ "addq_lock $mem, $add" %}
10910 ins_encode %{
10911 __ lock();
10912 __ addq($mem$$Address, $add$$constant);
10913 %}
10914 ins_pipe(pipe_cmpxchg);
10915 %}
10916
10917 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10918 predicate(!n->as_LoadStore()->result_not_used());
10919 match(Set newval (GetAndAddL mem newval));
10920 effect(KILL cr);
10921 format %{ "xaddq_lock $mem, $newval" %}
10922 ins_encode %{
10923 __ lock();
10924 __ xaddq($mem$$Address, $newval$$Register);
10925 %}
10926 ins_pipe(pipe_cmpxchg);
10927 %}
10928
10929 instruct xchgB( memory mem, rRegI newval) %{
10930 match(Set newval (GetAndSetB mem newval));
10931 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
10932 ins_encode %{
10933 __ xchgb($newval$$Register, $mem$$Address);
10934 __ narrow_subword_type($newval$$Register, T_BYTE);
10935 %}
10936 ins_pipe( pipe_cmpxchg );
10937 %}
10938
10939 instruct xchgS( memory mem, rRegI newval) %{
10940 match(Set newval (GetAndSetS mem newval));
10941 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
10942 ins_encode %{
10943 __ xchgw($newval$$Register, $mem$$Address);
10944 __ narrow_subword_type($newval$$Register, T_SHORT);
10945 %}
10946 ins_pipe( pipe_cmpxchg );
10947 %}
10948
10949 instruct xchgI( memory mem, rRegI newval) %{
10950 match(Set newval (GetAndSetI mem newval));
10951 format %{ "XCHGL $newval,[$mem]" %}
10952 ins_encode %{
10953 __ xchgl($newval$$Register, $mem$$Address);
10954 %}
10955 ins_pipe( pipe_cmpxchg );
10956 %}
10957
10958 instruct xchgL( memory mem, rRegL newval) %{
10959 match(Set newval (GetAndSetL mem newval));
10960 format %{ "XCHGL $newval,[$mem]" %}
10961 ins_encode %{
10962 __ xchgq($newval$$Register, $mem$$Address);
10963 %}
10964 ins_pipe( pipe_cmpxchg );
10965 %}
10966
10967 instruct xchgP( memory mem, rRegP newval) %{
10968 match(Set newval (GetAndSetP mem newval));
10969 predicate(n->as_LoadStore()->barrier_data() == 0);
10970 format %{ "XCHGQ $newval,[$mem]" %}
10971 ins_encode %{
10972 __ xchgq($newval$$Register, $mem$$Address);
10973 %}
10974 ins_pipe( pipe_cmpxchg );
10975 %}
10976
10977 instruct xchgN( memory mem, rRegN newval) %{
10978 predicate(n->as_LoadStore()->barrier_data() == 0);
10979 match(Set newval (GetAndSetN mem newval));
10980 format %{ "XCHGL $newval,$mem]" %}
10981 ins_encode %{
10982 __ xchgl($newval$$Register, $mem$$Address);
10983 %}
10984 ins_pipe( pipe_cmpxchg );
10985 %}
10986
10987 //----------Abs Instructions-------------------------------------------
10988
10989 // Integer Absolute Instructions
10990 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10991 %{
10992 match(Set dst (AbsI src));
10993 effect(TEMP dst, KILL cr);
10994 format %{ "xorl $dst, $dst\t# abs int\n\t"
10995 "subl $dst, $src\n\t"
10996 "cmovll $dst, $src" %}
10997 ins_encode %{
10998 __ xorl($dst$$Register, $dst$$Register);
10999 __ subl($dst$$Register, $src$$Register);
11000 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11001 %}
11002
11003 ins_pipe(ialu_reg_reg);
11004 %}
11005
11006 // Long Absolute Instructions
11007 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11008 %{
11009 match(Set dst (AbsL src));
11010 effect(TEMP dst, KILL cr);
11011 format %{ "xorl $dst, $dst\t# abs long\n\t"
11012 "subq $dst, $src\n\t"
11013 "cmovlq $dst, $src" %}
11014 ins_encode %{
11015 __ xorl($dst$$Register, $dst$$Register);
11016 __ subq($dst$$Register, $src$$Register);
11017 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11018 %}
11019
11020 ins_pipe(ialu_reg_reg);
11021 %}
11022
11023 //----------Subtraction Instructions-------------------------------------------
11024
11025 // Integer Subtraction Instructions
11026 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11027 %{
11028 predicate(!UseAPX);
11029 match(Set dst (SubI dst src));
11030 effect(KILL cr);
11031 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11032
11033 format %{ "subl $dst, $src\t# int" %}
11034 ins_encode %{
11035 __ subl($dst$$Register, $src$$Register);
11036 %}
11037 ins_pipe(ialu_reg_reg);
11038 %}
11039
11040 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11041 %{
11042 predicate(UseAPX);
11043 match(Set dst (SubI src1 src2));
11044 effect(KILL cr);
11045 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11046
11047 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11048 ins_encode %{
11049 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11050 %}
11051 ins_pipe(ialu_reg_reg);
11052 %}
11053
11054 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11055 %{
11056 predicate(UseAPX);
11057 match(Set dst (SubI src1 src2));
11058 effect(KILL cr);
11059 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11060
11061 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11062 ins_encode %{
11063 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11064 %}
11065 ins_pipe(ialu_reg_reg);
11066 %}
11067
11068 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11069 %{
11070 match(Set dst (SubI dst (LoadI src)));
11071 effect(KILL cr);
11072 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11073
11074 ins_cost(150);
11075 format %{ "subl $dst, $src\t# int" %}
11076 ins_encode %{
11077 __ subl($dst$$Register, $src$$Address);
11078 %}
11079 ins_pipe(ialu_reg_mem);
11080 %}
11081
11082 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11083 %{
11084 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11085 effect(KILL cr);
11086 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11087
11088 ins_cost(150);
11089 format %{ "subl $dst, $src\t# int" %}
11090 ins_encode %{
11091 __ subl($dst$$Address, $src$$Register);
11092 %}
11093 ins_pipe(ialu_mem_reg);
11094 %}
11095
11096 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11097 %{
11098 predicate(!UseAPX);
11099 match(Set dst (SubL dst src));
11100 effect(KILL cr);
11101 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11102
11103 format %{ "subq $dst, $src\t# long" %}
11104 ins_encode %{
11105 __ subq($dst$$Register, $src$$Register);
11106 %}
11107 ins_pipe(ialu_reg_reg);
11108 %}
11109
11110 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11111 %{
11112 predicate(UseAPX);
11113 match(Set dst (SubL src1 src2));
11114 effect(KILL cr);
11115 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11116
11117 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11118 ins_encode %{
11119 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11120 %}
11121 ins_pipe(ialu_reg_reg);
11122 %}
11123
11124 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11125 %{
11126 predicate(UseAPX);
11127 match(Set dst (SubL src1 src2));
11128 effect(KILL cr);
11129 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11130
11131 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11132 ins_encode %{
11133 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11134 %}
11135 ins_pipe(ialu_reg_reg);
11136 %}
11137
11138 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11139 %{
11140 match(Set dst (SubL dst (LoadL src)));
11141 effect(KILL cr);
11142 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11143
11144 ins_cost(150);
11145 format %{ "subq $dst, $src\t# long" %}
11146 ins_encode %{
11147 __ subq($dst$$Register, $src$$Address);
11148 %}
11149 ins_pipe(ialu_reg_mem);
11150 %}
11151
11152 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11153 %{
11154 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11155 effect(KILL cr);
11156 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11157
11158 ins_cost(150);
11159 format %{ "subq $dst, $src\t# long" %}
11160 ins_encode %{
11161 __ subq($dst$$Address, $src$$Register);
11162 %}
11163 ins_pipe(ialu_mem_reg);
11164 %}
11165
11166 // Subtract from a pointer
11167 // XXX hmpf???
11168 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11169 %{
11170 match(Set dst (AddP dst (SubI zero src)));
11171 effect(KILL cr);
11172
11173 format %{ "subq $dst, $src\t# ptr - int" %}
11174 ins_encode %{
11175 __ subq($dst$$Register, $src$$Register);
11176 %}
11177 ins_pipe(ialu_reg_reg);
11178 %}
11179
11180 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11181 %{
11182 predicate(!UseAPX);
11183 match(Set dst (SubI zero dst));
11184 effect(KILL cr);
11185 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11186
11187 format %{ "negl $dst\t# int" %}
11188 ins_encode %{
11189 __ negl($dst$$Register);
11190 %}
11191 ins_pipe(ialu_reg);
11192 %}
11193
11194 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11195 %{
11196 predicate(UseAPX);
11197 match(Set dst (SubI zero src));
11198 effect(KILL cr);
11199 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11200
11201 format %{ "enegl $dst, $src\t# int ndd" %}
11202 ins_encode %{
11203 __ enegl($dst$$Register, $src$$Register, false);
11204 %}
11205 ins_pipe(ialu_reg);
11206 %}
11207
11208 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11209 %{
11210 predicate(!UseAPX);
11211 match(Set dst (NegI dst));
11212 effect(KILL cr);
11213 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11214
11215 format %{ "negl $dst\t# int" %}
11216 ins_encode %{
11217 __ negl($dst$$Register);
11218 %}
11219 ins_pipe(ialu_reg);
11220 %}
11221
11222 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11223 %{
11224 predicate(UseAPX);
11225 match(Set dst (NegI src));
11226 effect(KILL cr);
11227 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11228
11229 format %{ "enegl $dst, $src\t# int ndd" %}
11230 ins_encode %{
11231 __ enegl($dst$$Register, $src$$Register, false);
11232 %}
11233 ins_pipe(ialu_reg);
11234 %}
11235
11236 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11237 %{
11238 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11239 effect(KILL cr);
11240 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11241
11242 format %{ "negl $dst\t# int" %}
11243 ins_encode %{
11244 __ negl($dst$$Address);
11245 %}
11246 ins_pipe(ialu_reg);
11247 %}
11248
11249 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11250 %{
11251 predicate(!UseAPX);
11252 match(Set dst (SubL zero dst));
11253 effect(KILL cr);
11254 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11255
11256 format %{ "negq $dst\t# long" %}
11257 ins_encode %{
11258 __ negq($dst$$Register);
11259 %}
11260 ins_pipe(ialu_reg);
11261 %}
11262
11263 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11264 %{
11265 predicate(UseAPX);
11266 match(Set dst (SubL zero src));
11267 effect(KILL cr);
11268 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11269
11270 format %{ "enegq $dst, $src\t# long ndd" %}
11271 ins_encode %{
11272 __ enegq($dst$$Register, $src$$Register, false);
11273 %}
11274 ins_pipe(ialu_reg);
11275 %}
11276
11277 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11278 %{
11279 predicate(!UseAPX);
11280 match(Set dst (NegL dst));
11281 effect(KILL cr);
11282 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11283
11284 format %{ "negq $dst\t# int" %}
11285 ins_encode %{
11286 __ negq($dst$$Register);
11287 %}
11288 ins_pipe(ialu_reg);
11289 %}
11290
11291 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11292 %{
11293 predicate(UseAPX);
11294 match(Set dst (NegL src));
11295 effect(KILL cr);
11296 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11297
11298 format %{ "enegq $dst, $src\t# long ndd" %}
11299 ins_encode %{
11300 __ enegq($dst$$Register, $src$$Register, false);
11301 %}
11302 ins_pipe(ialu_reg);
11303 %}
11304
11305 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11306 %{
11307 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11308 effect(KILL cr);
11309 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11310
11311 format %{ "negq $dst\t# long" %}
11312 ins_encode %{
11313 __ negq($dst$$Address);
11314 %}
11315 ins_pipe(ialu_reg);
11316 %}
11317
11318 //----------Multiplication/Division Instructions-------------------------------
11319 // Integer Multiplication Instructions
11320 // Multiply Register
11321
11322 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11323 %{
11324 predicate(!UseAPX);
11325 match(Set dst (MulI dst src));
11326 effect(KILL cr);
11327
11328 ins_cost(300);
11329 format %{ "imull $dst, $src\t# int" %}
11330 ins_encode %{
11331 __ imull($dst$$Register, $src$$Register);
11332 %}
11333 ins_pipe(ialu_reg_reg_alu0);
11334 %}
11335
11336 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11337 %{
11338 predicate(UseAPX);
11339 match(Set dst (MulI src1 src2));
11340 effect(KILL cr);
11341 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11342
11343 ins_cost(300);
11344 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11345 ins_encode %{
11346 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11347 %}
11348 ins_pipe(ialu_reg_reg_alu0);
11349 %}
11350
11351 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11352 %{
11353 match(Set dst (MulI src imm));
11354 effect(KILL cr);
11355
11356 ins_cost(300);
11357 format %{ "imull $dst, $src, $imm\t# int" %}
11358 ins_encode %{
11359 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11360 %}
11361 ins_pipe(ialu_reg_reg_alu0);
11362 %}
11363
11364 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11365 %{
11366 match(Set dst (MulI dst (LoadI src)));
11367 effect(KILL cr);
11368
11369 ins_cost(350);
11370 format %{ "imull $dst, $src\t# int" %}
11371 ins_encode %{
11372 __ imull($dst$$Register, $src$$Address);
11373 %}
11374 ins_pipe(ialu_reg_mem_alu0);
11375 %}
11376
11377 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11378 %{
11379 match(Set dst (MulI (LoadI src) imm));
11380 effect(KILL cr);
11381
11382 ins_cost(300);
11383 format %{ "imull $dst, $src, $imm\t# int" %}
11384 ins_encode %{
11385 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11386 %}
11387 ins_pipe(ialu_reg_mem_alu0);
11388 %}
11389
11390 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11391 %{
11392 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11393 effect(KILL cr, KILL src2);
11394
11395 expand %{ mulI_rReg(dst, src1, cr);
11396 mulI_rReg(src2, src3, cr);
11397 addI_rReg(dst, src2, cr); %}
11398 %}
11399
11400 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11401 %{
11402 predicate(!UseAPX);
11403 match(Set dst (MulL dst src));
11404 effect(KILL cr);
11405
11406 ins_cost(300);
11407 format %{ "imulq $dst, $src\t# long" %}
11408 ins_encode %{
11409 __ imulq($dst$$Register, $src$$Register);
11410 %}
11411 ins_pipe(ialu_reg_reg_alu0);
11412 %}
11413
11414 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11415 %{
11416 predicate(UseAPX);
11417 match(Set dst (MulL src1 src2));
11418 effect(KILL cr);
11419 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11420
11421 ins_cost(300);
11422 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11423 ins_encode %{
11424 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11425 %}
11426 ins_pipe(ialu_reg_reg_alu0);
11427 %}
11428
11429 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11430 %{
11431 match(Set dst (MulL src imm));
11432 effect(KILL cr);
11433
11434 ins_cost(300);
11435 format %{ "imulq $dst, $src, $imm\t# long" %}
11436 ins_encode %{
11437 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11438 %}
11439 ins_pipe(ialu_reg_reg_alu0);
11440 %}
11441
11442 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11443 %{
11444 match(Set dst (MulL dst (LoadL src)));
11445 effect(KILL cr);
11446
11447 ins_cost(350);
11448 format %{ "imulq $dst, $src\t# long" %}
11449 ins_encode %{
11450 __ imulq($dst$$Register, $src$$Address);
11451 %}
11452 ins_pipe(ialu_reg_mem_alu0);
11453 %}
11454
11455
11456 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11457 %{
11458 match(Set dst (MulL (LoadL src) imm));
11459 effect(KILL cr);
11460
11461 ins_cost(300);
11462 format %{ "imulq $dst, $src, $imm\t# long" %}
11463 ins_encode %{
11464 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11465 %}
11466 ins_pipe(ialu_reg_mem_alu0);
11467 %}
11468
11469 instruct mulHiLoL_rReg(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr)
11470 %{
11471 match(MulHiLoL src rax);
11472 match(MulHiLoL rax src);
11473 effect(KILL cr);
11474
11475 ins_cost(300);
11476 format %{ "imulq RDX:RAX, RAX, $src\t# mulhilo" %}
11477 ins_encode %{
11478 __ imulq($src$$Register);
11479 %}
11480 ins_pipe(ialu_reg_reg_alu0);
11481 %}
11482
11483 instruct umulHiLoL_rReg(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr)
11484 %{
11485 match(UMulHiLoL src rax);
11486 match(UMulHiLoL rax src);
11487 effect(KILL cr);
11488
11489 ins_cost(300);
11490 format %{ "mulq RDX:RAX, RAX, $src\t# umulhilo" %}
11491 ins_encode %{
11492 __ mulq($src$$Register);
11493 %}
11494 ins_pipe(ialu_reg_reg_alu0);
11495 %}
11496
11497 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11498 %{
11499 match(Set dst (MulHiL src rax));
11500 effect(USE_KILL rax, KILL cr);
11501
11502 ins_cost(300);
11503 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11504 ins_encode %{
11505 __ imulq($src$$Register);
11506 %}
11507 ins_pipe(ialu_reg_reg_alu0);
11508 %}
11509
11510 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11511 %{
11512 match(Set dst (UMulHiL src rax));
11513 effect(USE_KILL rax, KILL cr);
11514
11515 ins_cost(300);
11516 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11517 ins_encode %{
11518 __ mulq($src$$Register);
11519 %}
11520 ins_pipe(ialu_reg_reg_alu0);
11521 %}
11522
11523 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11524 rFlagsReg cr)
11525 %{
11526 match(Set rax (DivI rax div));
11527 effect(KILL rdx, KILL cr);
11528
11529 ins_cost(30*100+10*100); // XXX
11530 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11531 "jne,s normal\n\t"
11532 "xorl rdx, rdx\n\t"
11533 "cmpl $div, -1\n\t"
11534 "je,s done\n"
11535 "normal: cdql\n\t"
11536 "idivl $div\n"
11537 "done:" %}
11538 ins_encode(cdql_enc(div));
11539 ins_pipe(ialu_reg_reg_alu0);
11540 %}
11541
11542 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11543 rFlagsReg cr)
11544 %{
11545 match(Set rax (DivL rax div));
11546 effect(KILL rdx, KILL cr);
11547
11548 ins_cost(30*100+10*100); // XXX
11549 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11550 "cmpq rax, rdx\n\t"
11551 "jne,s normal\n\t"
11552 "xorl rdx, rdx\n\t"
11553 "cmpq $div, -1\n\t"
11554 "je,s done\n"
11555 "normal: cdqq\n\t"
11556 "idivq $div\n"
11557 "done:" %}
11558 ins_encode(cdqq_enc(div));
11559 ins_pipe(ialu_reg_reg_alu0);
11560 %}
11561
11562 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11563 %{
11564 match(Set rax (UDivI rax div));
11565 effect(KILL rdx, KILL cr);
11566
11567 ins_cost(300);
11568 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11569 ins_encode %{
11570 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11571 %}
11572 ins_pipe(ialu_reg_reg_alu0);
11573 %}
11574
11575 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11576 %{
11577 match(Set rax (UDivL rax div));
11578 effect(KILL rdx, KILL cr);
11579
11580 ins_cost(300);
11581 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11582 ins_encode %{
11583 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11584 %}
11585 ins_pipe(ialu_reg_reg_alu0);
11586 %}
11587
11588 // Integer DIVMOD with Register, both quotient and mod results
11589 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11590 rFlagsReg cr)
11591 %{
11592 match(DivModI rax div);
11593 effect(KILL cr);
11594
11595 ins_cost(30*100+10*100); // XXX
11596 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11597 "jne,s normal\n\t"
11598 "xorl rdx, rdx\n\t"
11599 "cmpl $div, -1\n\t"
11600 "je,s done\n"
11601 "normal: cdql\n\t"
11602 "idivl $div\n"
11603 "done:" %}
11604 ins_encode(cdql_enc(div));
11605 ins_pipe(pipe_slow);
11606 %}
11607
11608 // Long DIVMOD with Register, both quotient and mod results
11609 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11610 rFlagsReg cr)
11611 %{
11612 match(DivModL rax div);
11613 effect(KILL cr);
11614
11615 ins_cost(30*100+10*100); // XXX
11616 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11617 "cmpq rax, rdx\n\t"
11618 "jne,s normal\n\t"
11619 "xorl rdx, rdx\n\t"
11620 "cmpq $div, -1\n\t"
11621 "je,s done\n"
11622 "normal: cdqq\n\t"
11623 "idivq $div\n"
11624 "done:" %}
11625 ins_encode(cdqq_enc(div));
11626 ins_pipe(pipe_slow);
11627 %}
11628
11629 // Unsigned integer DIVMOD with Register, both quotient and mod results
11630 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11631 no_rax_rdx_RegI div, rFlagsReg cr)
11632 %{
11633 match(UDivModI rax div);
11634 effect(TEMP tmp, KILL cr);
11635
11636 ins_cost(300);
11637 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11638 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11639 %}
11640 ins_encode %{
11641 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11642 %}
11643 ins_pipe(pipe_slow);
11644 %}
11645
11646 // Unsigned long DIVMOD with Register, both quotient and mod results
11647 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11648 no_rax_rdx_RegL div, rFlagsReg cr)
11649 %{
11650 match(UDivModL rax div);
11651 effect(TEMP tmp, KILL cr);
11652
11653 ins_cost(300);
11654 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11655 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11656 %}
11657 ins_encode %{
11658 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11659 %}
11660 ins_pipe(pipe_slow);
11661 %}
11662
11663 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11664 rFlagsReg cr)
11665 %{
11666 match(Set rdx (ModI rax div));
11667 effect(KILL rax, KILL cr);
11668
11669 ins_cost(300); // XXX
11670 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11671 "jne,s normal\n\t"
11672 "xorl rdx, rdx\n\t"
11673 "cmpl $div, -1\n\t"
11674 "je,s done\n"
11675 "normal: cdql\n\t"
11676 "idivl $div\n"
11677 "done:" %}
11678 ins_encode(cdql_enc(div));
11679 ins_pipe(ialu_reg_reg_alu0);
11680 %}
11681
11682 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11683 rFlagsReg cr)
11684 %{
11685 match(Set rdx (ModL rax div));
11686 effect(KILL rax, KILL cr);
11687
11688 ins_cost(300); // XXX
11689 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11690 "cmpq rax, rdx\n\t"
11691 "jne,s normal\n\t"
11692 "xorl rdx, rdx\n\t"
11693 "cmpq $div, -1\n\t"
11694 "je,s done\n"
11695 "normal: cdqq\n\t"
11696 "idivq $div\n"
11697 "done:" %}
11698 ins_encode(cdqq_enc(div));
11699 ins_pipe(ialu_reg_reg_alu0);
11700 %}
11701
11702 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11703 %{
11704 match(Set rdx (UModI rax div));
11705 effect(KILL rax, KILL cr);
11706
11707 ins_cost(300);
11708 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11709 ins_encode %{
11710 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11711 %}
11712 ins_pipe(ialu_reg_reg_alu0);
11713 %}
11714
11715 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11716 %{
11717 match(Set rdx (UModL rax div));
11718 effect(KILL rax, KILL cr);
11719
11720 ins_cost(300);
11721 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11722 ins_encode %{
11723 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11724 %}
11725 ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727
11728 // Integer Shift Instructions
11729 // Shift Left by one, two, three
11730 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11731 %{
11732 predicate(!UseAPX);
11733 match(Set dst (LShiftI dst shift));
11734 effect(KILL cr);
11735
11736 format %{ "sall $dst, $shift" %}
11737 ins_encode %{
11738 __ sall($dst$$Register, $shift$$constant);
11739 %}
11740 ins_pipe(ialu_reg);
11741 %}
11742
11743 // Shift Left by one, two, three
11744 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11745 %{
11746 predicate(UseAPX);
11747 match(Set dst (LShiftI src shift));
11748 effect(KILL cr);
11749 flag(PD::Flag_ndd_demotable_opr1);
11750
11751 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11752 ins_encode %{
11753 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11754 %}
11755 ins_pipe(ialu_reg);
11756 %}
11757
11758 // Shift Left by 8-bit immediate
11759 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11760 %{
11761 predicate(!UseAPX);
11762 match(Set dst (LShiftI dst shift));
11763 effect(KILL cr);
11764
11765 format %{ "sall $dst, $shift" %}
11766 ins_encode %{
11767 __ sall($dst$$Register, $shift$$constant);
11768 %}
11769 ins_pipe(ialu_reg);
11770 %}
11771
11772 // Shift Left by 8-bit immediate
11773 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11774 %{
11775 predicate(UseAPX);
11776 match(Set dst (LShiftI src shift));
11777 effect(KILL cr);
11778 flag(PD::Flag_ndd_demotable_opr1);
11779
11780 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11781 ins_encode %{
11782 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11783 %}
11784 ins_pipe(ialu_reg);
11785 %}
11786
11787 // Shift Left by 8-bit immediate
11788 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11789 %{
11790 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11791 effect(KILL cr);
11792
11793 format %{ "sall $dst, $shift" %}
11794 ins_encode %{
11795 __ sall($dst$$Address, $shift$$constant);
11796 %}
11797 ins_pipe(ialu_mem_imm);
11798 %}
11799
11800 // Shift Left by variable
11801 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11802 %{
11803 predicate(!VM_Version::supports_bmi2());
11804 match(Set dst (LShiftI dst shift));
11805 effect(KILL cr);
11806
11807 format %{ "sall $dst, $shift" %}
11808 ins_encode %{
11809 __ sall($dst$$Register);
11810 %}
11811 ins_pipe(ialu_reg_reg);
11812 %}
11813
11814 // Shift Left by variable
11815 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11816 %{
11817 predicate(!VM_Version::supports_bmi2());
11818 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11819 effect(KILL cr);
11820
11821 format %{ "sall $dst, $shift" %}
11822 ins_encode %{
11823 __ sall($dst$$Address);
11824 %}
11825 ins_pipe(ialu_mem_reg);
11826 %}
11827
11828 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11829 %{
11830 predicate(VM_Version::supports_bmi2());
11831 match(Set dst (LShiftI src shift));
11832
11833 format %{ "shlxl $dst, $src, $shift" %}
11834 ins_encode %{
11835 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11836 %}
11837 ins_pipe(ialu_reg_reg);
11838 %}
11839
11840 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11841 %{
11842 predicate(VM_Version::supports_bmi2());
11843 match(Set dst (LShiftI (LoadI src) shift));
11844 ins_cost(175);
11845 format %{ "shlxl $dst, $src, $shift" %}
11846 ins_encode %{
11847 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11848 %}
11849 ins_pipe(ialu_reg_mem);
11850 %}
11851
11852 // Arithmetic Shift Right by 8-bit immediate
11853 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11854 %{
11855 predicate(!UseAPX);
11856 match(Set dst (RShiftI dst shift));
11857 effect(KILL cr);
11858
11859 format %{ "sarl $dst, $shift" %}
11860 ins_encode %{
11861 __ sarl($dst$$Register, $shift$$constant);
11862 %}
11863 ins_pipe(ialu_mem_imm);
11864 %}
11865
11866 // Arithmetic Shift Right by 8-bit immediate
11867 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11868 %{
11869 predicate(UseAPX);
11870 match(Set dst (RShiftI src shift));
11871 effect(KILL cr);
11872 flag(PD::Flag_ndd_demotable_opr1);
11873
11874 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11875 ins_encode %{
11876 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11877 %}
11878 ins_pipe(ialu_mem_imm);
11879 %}
11880
11881 // Arithmetic Shift Right by 8-bit immediate
11882 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11883 %{
11884 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11885 effect(KILL cr);
11886
11887 format %{ "sarl $dst, $shift" %}
11888 ins_encode %{
11889 __ sarl($dst$$Address, $shift$$constant);
11890 %}
11891 ins_pipe(ialu_mem_imm);
11892 %}
11893
11894 // Arithmetic Shift Right by variable
11895 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11896 %{
11897 predicate(!VM_Version::supports_bmi2());
11898 match(Set dst (RShiftI dst shift));
11899 effect(KILL cr);
11900
11901 format %{ "sarl $dst, $shift" %}
11902 ins_encode %{
11903 __ sarl($dst$$Register);
11904 %}
11905 ins_pipe(ialu_reg_reg);
11906 %}
11907
11908 // Arithmetic Shift Right by variable
11909 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11910 %{
11911 predicate(!VM_Version::supports_bmi2());
11912 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11913 effect(KILL cr);
11914
11915 format %{ "sarl $dst, $shift" %}
11916 ins_encode %{
11917 __ sarl($dst$$Address);
11918 %}
11919 ins_pipe(ialu_mem_reg);
11920 %}
11921
11922 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11923 %{
11924 predicate(VM_Version::supports_bmi2());
11925 match(Set dst (RShiftI src shift));
11926
11927 format %{ "sarxl $dst, $src, $shift" %}
11928 ins_encode %{
11929 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11930 %}
11931 ins_pipe(ialu_reg_reg);
11932 %}
11933
11934 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11935 %{
11936 predicate(VM_Version::supports_bmi2());
11937 match(Set dst (RShiftI (LoadI src) shift));
11938 ins_cost(175);
11939 format %{ "sarxl $dst, $src, $shift" %}
11940 ins_encode %{
11941 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11942 %}
11943 ins_pipe(ialu_reg_mem);
11944 %}
11945
11946 // Logical Shift Right by 8-bit immediate
11947 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11948 %{
11949 predicate(!UseAPX);
11950 match(Set dst (URShiftI dst shift));
11951 effect(KILL cr);
11952
11953 format %{ "shrl $dst, $shift" %}
11954 ins_encode %{
11955 __ shrl($dst$$Register, $shift$$constant);
11956 %}
11957 ins_pipe(ialu_reg);
11958 %}
11959
11960 // Logical Shift Right by 8-bit immediate
11961 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11962 %{
11963 predicate(UseAPX);
11964 match(Set dst (URShiftI src shift));
11965 effect(KILL cr);
11966 flag(PD::Flag_ndd_demotable_opr1);
11967
11968 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
11969 ins_encode %{
11970 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
11971 %}
11972 ins_pipe(ialu_reg);
11973 %}
11974
11975 // Logical Shift Right by 8-bit immediate
11976 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11977 %{
11978 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11979 effect(KILL cr);
11980
11981 format %{ "shrl $dst, $shift" %}
11982 ins_encode %{
11983 __ shrl($dst$$Address, $shift$$constant);
11984 %}
11985 ins_pipe(ialu_mem_imm);
11986 %}
11987
11988 // Logical Shift Right by variable
11989 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11990 %{
11991 predicate(!VM_Version::supports_bmi2());
11992 match(Set dst (URShiftI dst shift));
11993 effect(KILL cr);
11994
11995 format %{ "shrl $dst, $shift" %}
11996 ins_encode %{
11997 __ shrl($dst$$Register);
11998 %}
11999 ins_pipe(ialu_reg_reg);
12000 %}
12001
12002 // Logical Shift Right by variable
12003 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12004 %{
12005 predicate(!VM_Version::supports_bmi2());
12006 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12007 effect(KILL cr);
12008
12009 format %{ "shrl $dst, $shift" %}
12010 ins_encode %{
12011 __ shrl($dst$$Address);
12012 %}
12013 ins_pipe(ialu_mem_reg);
12014 %}
12015
12016 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12017 %{
12018 predicate(VM_Version::supports_bmi2());
12019 match(Set dst (URShiftI src shift));
12020
12021 format %{ "shrxl $dst, $src, $shift" %}
12022 ins_encode %{
12023 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12024 %}
12025 ins_pipe(ialu_reg_reg);
12026 %}
12027
12028 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12029 %{
12030 predicate(VM_Version::supports_bmi2());
12031 match(Set dst (URShiftI (LoadI src) shift));
12032 ins_cost(175);
12033 format %{ "shrxl $dst, $src, $shift" %}
12034 ins_encode %{
12035 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12036 %}
12037 ins_pipe(ialu_reg_mem);
12038 %}
12039
12040 // Long Shift Instructions
12041 // Shift Left by one, two, three
12042 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12043 %{
12044 predicate(!UseAPX);
12045 match(Set dst (LShiftL dst shift));
12046 effect(KILL cr);
12047
12048 format %{ "salq $dst, $shift" %}
12049 ins_encode %{
12050 __ salq($dst$$Register, $shift$$constant);
12051 %}
12052 ins_pipe(ialu_reg);
12053 %}
12054
12055 // Shift Left by one, two, three
12056 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12057 %{
12058 predicate(UseAPX);
12059 match(Set dst (LShiftL src shift));
12060 effect(KILL cr);
12061 flag(PD::Flag_ndd_demotable_opr1);
12062
12063 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12064 ins_encode %{
12065 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12066 %}
12067 ins_pipe(ialu_reg);
12068 %}
12069
12070 // Shift Left by 8-bit immediate
12071 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12072 %{
12073 predicate(!UseAPX);
12074 match(Set dst (LShiftL dst shift));
12075 effect(KILL cr);
12076
12077 format %{ "salq $dst, $shift" %}
12078 ins_encode %{
12079 __ salq($dst$$Register, $shift$$constant);
12080 %}
12081 ins_pipe(ialu_reg);
12082 %}
12083
12084 // Shift Left by 8-bit immediate
12085 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12086 %{
12087 predicate(UseAPX);
12088 match(Set dst (LShiftL src shift));
12089 effect(KILL cr);
12090 flag(PD::Flag_ndd_demotable_opr1);
12091
12092 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12093 ins_encode %{
12094 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12095 %}
12096 ins_pipe(ialu_reg);
12097 %}
12098
12099 // Shift Left by 8-bit immediate
12100 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12101 %{
12102 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12103 effect(KILL cr);
12104
12105 format %{ "salq $dst, $shift" %}
12106 ins_encode %{
12107 __ salq($dst$$Address, $shift$$constant);
12108 %}
12109 ins_pipe(ialu_mem_imm);
12110 %}
12111
12112 // Shift Left by variable
12113 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12114 %{
12115 predicate(!VM_Version::supports_bmi2());
12116 match(Set dst (LShiftL dst shift));
12117 effect(KILL cr);
12118
12119 format %{ "salq $dst, $shift" %}
12120 ins_encode %{
12121 __ salq($dst$$Register);
12122 %}
12123 ins_pipe(ialu_reg_reg);
12124 %}
12125
12126 // Shift Left by variable
12127 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12128 %{
12129 predicate(!VM_Version::supports_bmi2());
12130 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12131 effect(KILL cr);
12132
12133 format %{ "salq $dst, $shift" %}
12134 ins_encode %{
12135 __ salq($dst$$Address);
12136 %}
12137 ins_pipe(ialu_mem_reg);
12138 %}
12139
12140 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12141 %{
12142 predicate(VM_Version::supports_bmi2());
12143 match(Set dst (LShiftL src shift));
12144
12145 format %{ "shlxq $dst, $src, $shift" %}
12146 ins_encode %{
12147 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12148 %}
12149 ins_pipe(ialu_reg_reg);
12150 %}
12151
12152 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12153 %{
12154 predicate(VM_Version::supports_bmi2());
12155 match(Set dst (LShiftL (LoadL src) shift));
12156 ins_cost(175);
12157 format %{ "shlxq $dst, $src, $shift" %}
12158 ins_encode %{
12159 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12160 %}
12161 ins_pipe(ialu_reg_mem);
12162 %}
12163
12164 // Arithmetic Shift Right by 8-bit immediate
12165 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12166 %{
12167 predicate(!UseAPX);
12168 match(Set dst (RShiftL dst shift));
12169 effect(KILL cr);
12170
12171 format %{ "sarq $dst, $shift" %}
12172 ins_encode %{
12173 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12174 %}
12175 ins_pipe(ialu_mem_imm);
12176 %}
12177
12178 // Arithmetic Shift Right by 8-bit immediate
12179 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12180 %{
12181 predicate(UseAPX);
12182 match(Set dst (RShiftL src shift));
12183 effect(KILL cr);
12184 flag(PD::Flag_ndd_demotable_opr1);
12185
12186 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12187 ins_encode %{
12188 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12189 %}
12190 ins_pipe(ialu_mem_imm);
12191 %}
12192
12193 // Arithmetic Shift Right by 8-bit immediate
12194 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12195 %{
12196 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12197 effect(KILL cr);
12198
12199 format %{ "sarq $dst, $shift" %}
12200 ins_encode %{
12201 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12202 %}
12203 ins_pipe(ialu_mem_imm);
12204 %}
12205
12206 // Arithmetic Shift Right by variable
12207 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12208 %{
12209 predicate(!VM_Version::supports_bmi2());
12210 match(Set dst (RShiftL dst shift));
12211 effect(KILL cr);
12212
12213 format %{ "sarq $dst, $shift" %}
12214 ins_encode %{
12215 __ sarq($dst$$Register);
12216 %}
12217 ins_pipe(ialu_reg_reg);
12218 %}
12219
12220 // Arithmetic Shift Right by variable
12221 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12222 %{
12223 predicate(!VM_Version::supports_bmi2());
12224 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12225 effect(KILL cr);
12226
12227 format %{ "sarq $dst, $shift" %}
12228 ins_encode %{
12229 __ sarq($dst$$Address);
12230 %}
12231 ins_pipe(ialu_mem_reg);
12232 %}
12233
12234 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12235 %{
12236 predicate(VM_Version::supports_bmi2());
12237 match(Set dst (RShiftL src shift));
12238
12239 format %{ "sarxq $dst, $src, $shift" %}
12240 ins_encode %{
12241 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12242 %}
12243 ins_pipe(ialu_reg_reg);
12244 %}
12245
12246 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12247 %{
12248 predicate(VM_Version::supports_bmi2());
12249 match(Set dst (RShiftL (LoadL src) shift));
12250 ins_cost(175);
12251 format %{ "sarxq $dst, $src, $shift" %}
12252 ins_encode %{
12253 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12254 %}
12255 ins_pipe(ialu_reg_mem);
12256 %}
12257
12258 // Logical Shift Right by 8-bit immediate
12259 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12260 %{
12261 predicate(!UseAPX);
12262 match(Set dst (URShiftL dst shift));
12263 effect(KILL cr);
12264
12265 format %{ "shrq $dst, $shift" %}
12266 ins_encode %{
12267 __ shrq($dst$$Register, $shift$$constant);
12268 %}
12269 ins_pipe(ialu_reg);
12270 %}
12271
12272 // Logical Shift Right by 8-bit immediate
12273 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12274 %{
12275 predicate(UseAPX);
12276 match(Set dst (URShiftL src shift));
12277 effect(KILL cr);
12278 flag(PD::Flag_ndd_demotable_opr1);
12279
12280 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12281 ins_encode %{
12282 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12283 %}
12284 ins_pipe(ialu_reg);
12285 %}
12286
12287 // Logical Shift Right by 8-bit immediate
12288 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12289 %{
12290 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12291 effect(KILL cr);
12292
12293 format %{ "shrq $dst, $shift" %}
12294 ins_encode %{
12295 __ shrq($dst$$Address, $shift$$constant);
12296 %}
12297 ins_pipe(ialu_mem_imm);
12298 %}
12299
12300 // Logical Shift Right by variable
12301 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12302 %{
12303 predicate(!VM_Version::supports_bmi2());
12304 match(Set dst (URShiftL dst shift));
12305 effect(KILL cr);
12306
12307 format %{ "shrq $dst, $shift" %}
12308 ins_encode %{
12309 __ shrq($dst$$Register);
12310 %}
12311 ins_pipe(ialu_reg_reg);
12312 %}
12313
12314 // Logical Shift Right by variable
12315 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12316 %{
12317 predicate(!VM_Version::supports_bmi2());
12318 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12319 effect(KILL cr);
12320
12321 format %{ "shrq $dst, $shift" %}
12322 ins_encode %{
12323 __ shrq($dst$$Address);
12324 %}
12325 ins_pipe(ialu_mem_reg);
12326 %}
12327
12328 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12329 %{
12330 predicate(VM_Version::supports_bmi2());
12331 match(Set dst (URShiftL src shift));
12332
12333 format %{ "shrxq $dst, $src, $shift" %}
12334 ins_encode %{
12335 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12336 %}
12337 ins_pipe(ialu_reg_reg);
12338 %}
12339
12340 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12341 %{
12342 predicate(VM_Version::supports_bmi2());
12343 match(Set dst (URShiftL (LoadL src) shift));
12344 ins_cost(175);
12345 format %{ "shrxq $dst, $src, $shift" %}
12346 ins_encode %{
12347 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12348 %}
12349 ins_pipe(ialu_reg_mem);
12350 %}
12351
12352 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12353 // This idiom is used by the compiler for the i2b bytecode.
12354 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12355 %{
12356 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12357
12358 format %{ "movsbl $dst, $src\t# i2b" %}
12359 ins_encode %{
12360 __ movsbl($dst$$Register, $src$$Register);
12361 %}
12362 ins_pipe(ialu_reg_reg);
12363 %}
12364
12365 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12366 // This idiom is used by the compiler the i2s bytecode.
12367 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12368 %{
12369 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12370
12371 format %{ "movswl $dst, $src\t# i2s" %}
12372 ins_encode %{
12373 __ movswl($dst$$Register, $src$$Register);
12374 %}
12375 ins_pipe(ialu_reg_reg);
12376 %}
12377
12378 // ROL/ROR instructions
12379
12380 // Rotate left by constant.
12381 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12382 %{
12383 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12384 match(Set dst (RotateLeft dst shift));
12385 effect(KILL cr);
12386 format %{ "roll $dst, $shift" %}
12387 ins_encode %{
12388 __ roll($dst$$Register, $shift$$constant);
12389 %}
12390 ins_pipe(ialu_reg);
12391 %}
12392
12393 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12394 %{
12395 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12396 match(Set dst (RotateLeft src shift));
12397 format %{ "rolxl $dst, $src, $shift" %}
12398 ins_encode %{
12399 int shift = 32 - ($shift$$constant & 31);
12400 __ rorxl($dst$$Register, $src$$Register, shift);
12401 %}
12402 ins_pipe(ialu_reg_reg);
12403 %}
12404
12405 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12406 %{
12407 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12408 match(Set dst (RotateLeft (LoadI src) shift));
12409 ins_cost(175);
12410 format %{ "rolxl $dst, $src, $shift" %}
12411 ins_encode %{
12412 int shift = 32 - ($shift$$constant & 31);
12413 __ rorxl($dst$$Register, $src$$Address, shift);
12414 %}
12415 ins_pipe(ialu_reg_mem);
12416 %}
12417
12418 // Rotate Left by variable
12419 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12420 %{
12421 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12422 match(Set dst (RotateLeft dst shift));
12423 effect(KILL cr);
12424 format %{ "roll $dst, $shift" %}
12425 ins_encode %{
12426 __ roll($dst$$Register);
12427 %}
12428 ins_pipe(ialu_reg_reg);
12429 %}
12430
12431 // Rotate Left by variable
12432 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12433 %{
12434 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12435 match(Set dst (RotateLeft src shift));
12436 effect(KILL cr);
12437 flag(PD::Flag_ndd_demotable_opr1);
12438
12439 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12440 ins_encode %{
12441 __ eroll($dst$$Register, $src$$Register, false);
12442 %}
12443 ins_pipe(ialu_reg_reg);
12444 %}
12445
12446 // Rotate Right by constant.
12447 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12448 %{
12449 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12450 match(Set dst (RotateRight dst shift));
12451 effect(KILL cr);
12452 format %{ "rorl $dst, $shift" %}
12453 ins_encode %{
12454 __ rorl($dst$$Register, $shift$$constant);
12455 %}
12456 ins_pipe(ialu_reg);
12457 %}
12458
12459 // Rotate Right by constant.
12460 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12461 %{
12462 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12463 match(Set dst (RotateRight src shift));
12464 format %{ "rorxl $dst, $src, $shift" %}
12465 ins_encode %{
12466 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12467 %}
12468 ins_pipe(ialu_reg_reg);
12469 %}
12470
12471 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12472 %{
12473 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12474 match(Set dst (RotateRight (LoadI src) shift));
12475 ins_cost(175);
12476 format %{ "rorxl $dst, $src, $shift" %}
12477 ins_encode %{
12478 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12479 %}
12480 ins_pipe(ialu_reg_mem);
12481 %}
12482
12483 // Rotate Right by variable
12484 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12485 %{
12486 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12487 match(Set dst (RotateRight dst shift));
12488 effect(KILL cr);
12489 format %{ "rorl $dst, $shift" %}
12490 ins_encode %{
12491 __ rorl($dst$$Register);
12492 %}
12493 ins_pipe(ialu_reg_reg);
12494 %}
12495
12496 // Rotate Right by variable
12497 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12498 %{
12499 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12500 match(Set dst (RotateRight src shift));
12501 effect(KILL cr);
12502 flag(PD::Flag_ndd_demotable_opr1);
12503
12504 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12505 ins_encode %{
12506 __ erorl($dst$$Register, $src$$Register, false);
12507 %}
12508 ins_pipe(ialu_reg_reg);
12509 %}
12510
12511 // Rotate Left by constant.
12512 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12513 %{
12514 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12515 match(Set dst (RotateLeft dst shift));
12516 effect(KILL cr);
12517 format %{ "rolq $dst, $shift" %}
12518 ins_encode %{
12519 __ rolq($dst$$Register, $shift$$constant);
12520 %}
12521 ins_pipe(ialu_reg);
12522 %}
12523
12524 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12525 %{
12526 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12527 match(Set dst (RotateLeft src shift));
12528 format %{ "rolxq $dst, $src, $shift" %}
12529 ins_encode %{
12530 int shift = 64 - ($shift$$constant & 63);
12531 __ rorxq($dst$$Register, $src$$Register, shift);
12532 %}
12533 ins_pipe(ialu_reg_reg);
12534 %}
12535
12536 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12537 %{
12538 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12539 match(Set dst (RotateLeft (LoadL src) shift));
12540 ins_cost(175);
12541 format %{ "rolxq $dst, $src, $shift" %}
12542 ins_encode %{
12543 int shift = 64 - ($shift$$constant & 63);
12544 __ rorxq($dst$$Register, $src$$Address, shift);
12545 %}
12546 ins_pipe(ialu_reg_mem);
12547 %}
12548
12549 // Rotate Left by variable
12550 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12551 %{
12552 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12553 match(Set dst (RotateLeft dst shift));
12554 effect(KILL cr);
12555
12556 format %{ "rolq $dst, $shift" %}
12557 ins_encode %{
12558 __ rolq($dst$$Register);
12559 %}
12560 ins_pipe(ialu_reg_reg);
12561 %}
12562
12563 // Rotate Left by variable
12564 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12565 %{
12566 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12567 match(Set dst (RotateLeft src shift));
12568 effect(KILL cr);
12569 flag(PD::Flag_ndd_demotable_opr1);
12570
12571 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12572 ins_encode %{
12573 __ erolq($dst$$Register, $src$$Register, false);
12574 %}
12575 ins_pipe(ialu_reg_reg);
12576 %}
12577
12578 // Rotate Right by constant.
12579 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12580 %{
12581 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12582 match(Set dst (RotateRight dst shift));
12583 effect(KILL cr);
12584 format %{ "rorq $dst, $shift" %}
12585 ins_encode %{
12586 __ rorq($dst$$Register, $shift$$constant);
12587 %}
12588 ins_pipe(ialu_reg);
12589 %}
12590
12591 // Rotate Right by constant
12592 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12593 %{
12594 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12595 match(Set dst (RotateRight src shift));
12596 format %{ "rorxq $dst, $src, $shift" %}
12597 ins_encode %{
12598 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12599 %}
12600 ins_pipe(ialu_reg_reg);
12601 %}
12602
12603 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12604 %{
12605 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12606 match(Set dst (RotateRight (LoadL src) shift));
12607 ins_cost(175);
12608 format %{ "rorxq $dst, $src, $shift" %}
12609 ins_encode %{
12610 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12611 %}
12612 ins_pipe(ialu_reg_mem);
12613 %}
12614
12615 // Rotate Right by variable
12616 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12617 %{
12618 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12619 match(Set dst (RotateRight dst shift));
12620 effect(KILL cr);
12621 format %{ "rorq $dst, $shift" %}
12622 ins_encode %{
12623 __ rorq($dst$$Register);
12624 %}
12625 ins_pipe(ialu_reg_reg);
12626 %}
12627
12628 // Rotate Right by variable
12629 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12630 %{
12631 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12632 match(Set dst (RotateRight src shift));
12633 effect(KILL cr);
12634 flag(PD::Flag_ndd_demotable_opr1);
12635
12636 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12637 ins_encode %{
12638 __ erorq($dst$$Register, $src$$Register, false);
12639 %}
12640 ins_pipe(ialu_reg_reg);
12641 %}
12642
12643 //----------------------------- CompressBits/ExpandBits ------------------------
12644
12645 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12646 predicate(n->bottom_type()->isa_long());
12647 match(Set dst (CompressBits src mask));
12648 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12649 ins_encode %{
12650 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12651 %}
12652 ins_pipe( pipe_slow );
12653 %}
12654
12655 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12656 predicate(n->bottom_type()->isa_long());
12657 match(Set dst (ExpandBits src mask));
12658 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12659 ins_encode %{
12660 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12661 %}
12662 ins_pipe( pipe_slow );
12663 %}
12664
12665 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12666 predicate(n->bottom_type()->isa_long());
12667 match(Set dst (CompressBits src (LoadL mask)));
12668 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12669 ins_encode %{
12670 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12671 %}
12672 ins_pipe( pipe_slow );
12673 %}
12674
12675 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12676 predicate(n->bottom_type()->isa_long());
12677 match(Set dst (ExpandBits src (LoadL mask)));
12678 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12679 ins_encode %{
12680 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12681 %}
12682 ins_pipe( pipe_slow );
12683 %}
12684
12685
12686 // Logical Instructions
12687
12688 // Integer Logical Instructions
12689
12690 // And Instructions
12691 // And Register with Register
12692 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12693 %{
12694 predicate(!UseAPX);
12695 match(Set dst (AndI dst src));
12696 effect(KILL cr);
12697 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12698
12699 format %{ "andl $dst, $src\t# int" %}
12700 ins_encode %{
12701 __ andl($dst$$Register, $src$$Register);
12702 %}
12703 ins_pipe(ialu_reg_reg);
12704 %}
12705
12706 // And Register with Register using New Data Destination (NDD)
12707 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12708 %{
12709 predicate(UseAPX);
12710 match(Set dst (AndI src1 src2));
12711 effect(KILL cr);
12712 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12713
12714 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12715 ins_encode %{
12716 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12717
12718 %}
12719 ins_pipe(ialu_reg_reg);
12720 %}
12721
12722 // And Register with Immediate 255
12723 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12724 %{
12725 match(Set dst (AndI src mask));
12726
12727 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12728 ins_encode %{
12729 __ movzbl($dst$$Register, $src$$Register);
12730 %}
12731 ins_pipe(ialu_reg);
12732 %}
12733
12734 // And Register with Immediate 255 and promote to long
12735 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12736 %{
12737 match(Set dst (ConvI2L (AndI src mask)));
12738
12739 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12740 ins_encode %{
12741 __ movzbl($dst$$Register, $src$$Register);
12742 %}
12743 ins_pipe(ialu_reg);
12744 %}
12745
12746 // And Register with Immediate 65535
12747 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12748 %{
12749 match(Set dst (AndI src mask));
12750
12751 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12752 ins_encode %{
12753 __ movzwl($dst$$Register, $src$$Register);
12754 %}
12755 ins_pipe(ialu_reg);
12756 %}
12757
12758 // And Register with Immediate 65535 and promote to long
12759 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12760 %{
12761 match(Set dst (ConvI2L (AndI src mask)));
12762
12763 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12764 ins_encode %{
12765 __ movzwl($dst$$Register, $src$$Register);
12766 %}
12767 ins_pipe(ialu_reg);
12768 %}
12769
12770 // Can skip int2long conversions after AND with small bitmask
12771 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12772 %{
12773 predicate(VM_Version::supports_bmi2());
12774 ins_cost(125);
12775 effect(TEMP tmp, KILL cr);
12776 match(Set dst (ConvI2L (AndI src mask)));
12777 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12778 ins_encode %{
12779 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12780 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12781 %}
12782 ins_pipe(ialu_reg_reg);
12783 %}
12784
12785 // And Register with Immediate
12786 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12787 %{
12788 predicate(!UseAPX);
12789 match(Set dst (AndI dst src));
12790 effect(KILL cr);
12791 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12792
12793 format %{ "andl $dst, $src\t# int" %}
12794 ins_encode %{
12795 __ andl($dst$$Register, $src$$constant);
12796 %}
12797 ins_pipe(ialu_reg);
12798 %}
12799
12800 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12801 %{
12802 predicate(UseAPX);
12803 match(Set dst (AndI src1 src2));
12804 effect(KILL cr);
12805 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12806
12807 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12808 ins_encode %{
12809 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12810 %}
12811 ins_pipe(ialu_reg);
12812 %}
12813
12814 // And Register with Memory
12815 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12816 %{
12817 match(Set dst (AndI dst (LoadI src)));
12818 effect(KILL cr);
12819 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12820
12821 ins_cost(150);
12822 format %{ "andl $dst, $src\t# int" %}
12823 ins_encode %{
12824 __ andl($dst$$Register, $src$$Address);
12825 %}
12826 ins_pipe(ialu_reg_mem);
12827 %}
12828
12829 // And Memory with Register
12830 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12831 %{
12832 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12833 effect(KILL cr);
12834 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12835
12836 ins_cost(150);
12837 format %{ "andb $dst, $src\t# byte" %}
12838 ins_encode %{
12839 __ andb($dst$$Address, $src$$Register);
12840 %}
12841 ins_pipe(ialu_mem_reg);
12842 %}
12843
12844 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12845 %{
12846 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12847 effect(KILL cr);
12848 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12849
12850 ins_cost(150);
12851 format %{ "andl $dst, $src\t# int" %}
12852 ins_encode %{
12853 __ andl($dst$$Address, $src$$Register);
12854 %}
12855 ins_pipe(ialu_mem_reg);
12856 %}
12857
12858 // And Memory with Immediate
12859 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12860 %{
12861 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12862 effect(KILL cr);
12863 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12864
12865 ins_cost(125);
12866 format %{ "andl $dst, $src\t# int" %}
12867 ins_encode %{
12868 __ andl($dst$$Address, $src$$constant);
12869 %}
12870 ins_pipe(ialu_mem_imm);
12871 %}
12872
12873 // BMI1 instructions
12874 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12875 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12876 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12877 effect(KILL cr);
12878 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12879
12880 ins_cost(125);
12881 format %{ "andnl $dst, $src1, $src2" %}
12882
12883 ins_encode %{
12884 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
12885 %}
12886 ins_pipe(ialu_reg_mem);
12887 %}
12888
12889 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
12890 match(Set dst (AndI (XorI src1 minus_1) src2));
12891 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12892 effect(KILL cr);
12893 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12894
12895 format %{ "andnl $dst, $src1, $src2" %}
12896
12897 ins_encode %{
12898 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
12899 %}
12900 ins_pipe(ialu_reg);
12901 %}
12902
12903 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
12904 match(Set dst (AndI (SubI imm_zero src) src));
12905 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12906 effect(KILL cr);
12907 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12908
12909 format %{ "blsil $dst, $src" %}
12910
12911 ins_encode %{
12912 __ blsil($dst$$Register, $src$$Register);
12913 %}
12914 ins_pipe(ialu_reg);
12915 %}
12916
12917 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
12918 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
12919 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12920 effect(KILL cr);
12921 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12922
12923 ins_cost(125);
12924 format %{ "blsil $dst, $src" %}
12925
12926 ins_encode %{
12927 __ blsil($dst$$Register, $src$$Address);
12928 %}
12929 ins_pipe(ialu_reg_mem);
12930 %}
12931
12932 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12933 %{
12934 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
12935 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12936 effect(KILL cr);
12937 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12938
12939 ins_cost(125);
12940 format %{ "blsmskl $dst, $src" %}
12941
12942 ins_encode %{
12943 __ blsmskl($dst$$Register, $src$$Address);
12944 %}
12945 ins_pipe(ialu_reg_mem);
12946 %}
12947
12948 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12949 %{
12950 match(Set dst (XorI (AddI src minus_1) src));
12951 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12952 effect(KILL cr);
12953 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12954
12955 format %{ "blsmskl $dst, $src" %}
12956
12957 ins_encode %{
12958 __ blsmskl($dst$$Register, $src$$Register);
12959 %}
12960
12961 ins_pipe(ialu_reg);
12962 %}
12963
12964 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12965 %{
12966 match(Set dst (AndI (AddI src minus_1) src) );
12967 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12968 effect(KILL cr);
12969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12970
12971 format %{ "blsrl $dst, $src" %}
12972
12973 ins_encode %{
12974 __ blsrl($dst$$Register, $src$$Register);
12975 %}
12976
12977 ins_pipe(ialu_reg_mem);
12978 %}
12979
12980 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12981 %{
12982 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
12983 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12984 effect(KILL cr);
12985 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12986
12987 ins_cost(125);
12988 format %{ "blsrl $dst, $src" %}
12989
12990 ins_encode %{
12991 __ blsrl($dst$$Register, $src$$Address);
12992 %}
12993
12994 ins_pipe(ialu_reg);
12995 %}
12996
12997 // Or Instructions
12998 // Or Register with Register
12999 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13000 %{
13001 predicate(!UseAPX);
13002 match(Set dst (OrI dst src));
13003 effect(KILL cr);
13004 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13005
13006 format %{ "orl $dst, $src\t# int" %}
13007 ins_encode %{
13008 __ orl($dst$$Register, $src$$Register);
13009 %}
13010 ins_pipe(ialu_reg_reg);
13011 %}
13012
13013 // Or Register with Register using New Data Destination (NDD)
13014 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13015 %{
13016 predicate(UseAPX);
13017 match(Set dst (OrI src1 src2));
13018 effect(KILL cr);
13019 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13020
13021 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13022 ins_encode %{
13023 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13024 %}
13025 ins_pipe(ialu_reg_reg);
13026 %}
13027
13028 // Or Register with Immediate
13029 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13030 %{
13031 predicate(!UseAPX);
13032 match(Set dst (OrI dst src));
13033 effect(KILL cr);
13034 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13035
13036 format %{ "orl $dst, $src\t# int" %}
13037 ins_encode %{
13038 __ orl($dst$$Register, $src$$constant);
13039 %}
13040 ins_pipe(ialu_reg);
13041 %}
13042
13043 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13044 %{
13045 predicate(UseAPX);
13046 match(Set dst (OrI src1 src2));
13047 effect(KILL cr);
13048 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13049
13050 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13051 ins_encode %{
13052 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13053 %}
13054 ins_pipe(ialu_reg);
13055 %}
13056
13057 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13058 %{
13059 predicate(UseAPX);
13060 match(Set dst (OrI src1 src2));
13061 effect(KILL cr);
13062 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13063
13064 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13065 ins_encode %{
13066 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13067 %}
13068 ins_pipe(ialu_reg);
13069 %}
13070
13071 // Or Register with Memory
13072 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13073 %{
13074 match(Set dst (OrI dst (LoadI src)));
13075 effect(KILL cr);
13076 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13077
13078 ins_cost(150);
13079 format %{ "orl $dst, $src\t# int" %}
13080 ins_encode %{
13081 __ orl($dst$$Register, $src$$Address);
13082 %}
13083 ins_pipe(ialu_reg_mem);
13084 %}
13085
13086 // Or Memory with Register
13087 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13088 %{
13089 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13090 effect(KILL cr);
13091 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13092
13093 ins_cost(150);
13094 format %{ "orb $dst, $src\t# byte" %}
13095 ins_encode %{
13096 __ orb($dst$$Address, $src$$Register);
13097 %}
13098 ins_pipe(ialu_mem_reg);
13099 %}
13100
13101 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13102 %{
13103 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13104 effect(KILL cr);
13105 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13106
13107 ins_cost(150);
13108 format %{ "orl $dst, $src\t# int" %}
13109 ins_encode %{
13110 __ orl($dst$$Address, $src$$Register);
13111 %}
13112 ins_pipe(ialu_mem_reg);
13113 %}
13114
13115 // Or Memory with Immediate
13116 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13117 %{
13118 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13119 effect(KILL cr);
13120 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13121
13122 ins_cost(125);
13123 format %{ "orl $dst, $src\t# int" %}
13124 ins_encode %{
13125 __ orl($dst$$Address, $src$$constant);
13126 %}
13127 ins_pipe(ialu_mem_imm);
13128 %}
13129
13130 // Xor Instructions
13131 // Xor Register with Register
13132 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13133 %{
13134 predicate(!UseAPX);
13135 match(Set dst (XorI dst src));
13136 effect(KILL cr);
13137 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13138
13139 format %{ "xorl $dst, $src\t# int" %}
13140 ins_encode %{
13141 __ xorl($dst$$Register, $src$$Register);
13142 %}
13143 ins_pipe(ialu_reg_reg);
13144 %}
13145
13146 // Xor Register with Register using New Data Destination (NDD)
13147 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13148 %{
13149 predicate(UseAPX);
13150 match(Set dst (XorI src1 src2));
13151 effect(KILL cr);
13152 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13153
13154 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13155 ins_encode %{
13156 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13157 %}
13158 ins_pipe(ialu_reg_reg);
13159 %}
13160
13161 // Xor Register with Immediate -1
13162 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13163 %{
13164 predicate(!UseAPX);
13165 match(Set dst (XorI dst imm));
13166
13167 format %{ "notl $dst" %}
13168 ins_encode %{
13169 __ notl($dst$$Register);
13170 %}
13171 ins_pipe(ialu_reg);
13172 %}
13173
13174 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13175 %{
13176 match(Set dst (XorI src imm));
13177 predicate(UseAPX);
13178 flag(PD::Flag_ndd_demotable_opr1);
13179
13180 format %{ "enotl $dst, $src" %}
13181 ins_encode %{
13182 __ enotl($dst$$Register, $src$$Register);
13183 %}
13184 ins_pipe(ialu_reg);
13185 %}
13186
13187 // Xor Register with Immediate
13188 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13189 %{
13190 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13191 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13192 match(Set dst (XorI dst src));
13193 effect(KILL cr);
13194 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13195
13196 format %{ "xorl $dst, $src\t# int" %}
13197 ins_encode %{
13198 __ xorl($dst$$Register, $src$$constant);
13199 %}
13200 ins_pipe(ialu_reg);
13201 %}
13202
13203 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13204 %{
13205 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13206 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13207 match(Set dst (XorI src1 src2));
13208 effect(KILL cr);
13209 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13210
13211 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13212 ins_encode %{
13213 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13214 %}
13215 ins_pipe(ialu_reg);
13216 %}
13217
13218 // Xor Register with Memory
13219 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13220 %{
13221 match(Set dst (XorI dst (LoadI src)));
13222 effect(KILL cr);
13223 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13224
13225 ins_cost(150);
13226 format %{ "xorl $dst, $src\t# int" %}
13227 ins_encode %{
13228 __ xorl($dst$$Register, $src$$Address);
13229 %}
13230 ins_pipe(ialu_reg_mem);
13231 %}
13232
13233 // Xor Memory with Register
13234 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13235 %{
13236 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13237 effect(KILL cr);
13238 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13239
13240 ins_cost(150);
13241 format %{ "xorb $dst, $src\t# byte" %}
13242 ins_encode %{
13243 __ xorb($dst$$Address, $src$$Register);
13244 %}
13245 ins_pipe(ialu_mem_reg);
13246 %}
13247
13248 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13249 %{
13250 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13251 effect(KILL cr);
13252 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13253
13254 ins_cost(150);
13255 format %{ "xorl $dst, $src\t# int" %}
13256 ins_encode %{
13257 __ xorl($dst$$Address, $src$$Register);
13258 %}
13259 ins_pipe(ialu_mem_reg);
13260 %}
13261
13262 // Xor Memory with Immediate
13263 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13264 %{
13265 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13266 effect(KILL cr);
13267 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13268
13269 ins_cost(125);
13270 format %{ "xorl $dst, $src\t# int" %}
13271 ins_encode %{
13272 __ xorl($dst$$Address, $src$$constant);
13273 %}
13274 ins_pipe(ialu_mem_imm);
13275 %}
13276
13277
13278 // Long Logical Instructions
13279
13280 // And Instructions
13281 // And Register with Register
13282 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13283 %{
13284 predicate(!UseAPX);
13285 match(Set dst (AndL dst src));
13286 effect(KILL cr);
13287 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13288
13289 format %{ "andq $dst, $src\t# long" %}
13290 ins_encode %{
13291 __ andq($dst$$Register, $src$$Register);
13292 %}
13293 ins_pipe(ialu_reg_reg);
13294 %}
13295
13296 // And Register with Register using New Data Destination (NDD)
13297 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13298 %{
13299 predicate(UseAPX);
13300 match(Set dst (AndL src1 src2));
13301 effect(KILL cr);
13302 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13303
13304 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13305 ins_encode %{
13306 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13307
13308 %}
13309 ins_pipe(ialu_reg_reg);
13310 %}
13311
13312 // And Register with Immediate 255
13313 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13314 %{
13315 match(Set dst (AndL src mask));
13316
13317 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13318 ins_encode %{
13319 // movzbl zeroes out the upper 32-bit and does not need REX.W
13320 __ movzbl($dst$$Register, $src$$Register);
13321 %}
13322 ins_pipe(ialu_reg);
13323 %}
13324
13325 // And Register with Immediate 65535
13326 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13327 %{
13328 match(Set dst (AndL src mask));
13329
13330 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13331 ins_encode %{
13332 // movzwl zeroes out the upper 32-bit and does not need REX.W
13333 __ movzwl($dst$$Register, $src$$Register);
13334 %}
13335 ins_pipe(ialu_reg);
13336 %}
13337
13338 // And Register with Immediate
13339 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13340 %{
13341 predicate(!UseAPX);
13342 match(Set dst (AndL dst src));
13343 effect(KILL cr);
13344 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13345
13346 format %{ "andq $dst, $src\t# long" %}
13347 ins_encode %{
13348 __ andq($dst$$Register, $src$$constant);
13349 %}
13350 ins_pipe(ialu_reg);
13351 %}
13352
13353 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13354 %{
13355 predicate(UseAPX);
13356 match(Set dst (AndL src1 src2));
13357 effect(KILL cr);
13358 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13359
13360 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13361 ins_encode %{
13362 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13363 %}
13364 ins_pipe(ialu_reg);
13365 %}
13366
13367 // And Register with Memory
13368 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13369 %{
13370 match(Set dst (AndL dst (LoadL src)));
13371 effect(KILL cr);
13372 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13373
13374 ins_cost(150);
13375 format %{ "andq $dst, $src\t# long" %}
13376 ins_encode %{
13377 __ andq($dst$$Register, $src$$Address);
13378 %}
13379 ins_pipe(ialu_reg_mem);
13380 %}
13381
13382 // And Memory with Register
13383 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13384 %{
13385 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13386 effect(KILL cr);
13387 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13388
13389 ins_cost(150);
13390 format %{ "andq $dst, $src\t# long" %}
13391 ins_encode %{
13392 __ andq($dst$$Address, $src$$Register);
13393 %}
13394 ins_pipe(ialu_mem_reg);
13395 %}
13396
13397 // And Memory with Immediate
13398 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13399 %{
13400 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13401 effect(KILL cr);
13402 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13403
13404 ins_cost(125);
13405 format %{ "andq $dst, $src\t# long" %}
13406 ins_encode %{
13407 __ andq($dst$$Address, $src$$constant);
13408 %}
13409 ins_pipe(ialu_mem_imm);
13410 %}
13411
13412 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13413 %{
13414 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13415 // because AND/OR works well enough for 8/32-bit values.
13416 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13417
13418 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13419 effect(KILL cr);
13420
13421 ins_cost(125);
13422 format %{ "btrq $dst, log2(not($con))\t# long" %}
13423 ins_encode %{
13424 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13425 %}
13426 ins_pipe(ialu_mem_imm);
13427 %}
13428
13429 // BMI1 instructions
13430 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13431 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13432 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13433 effect(KILL cr);
13434 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13435
13436 ins_cost(125);
13437 format %{ "andnq $dst, $src1, $src2" %}
13438
13439 ins_encode %{
13440 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13441 %}
13442 ins_pipe(ialu_reg_mem);
13443 %}
13444
13445 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13446 match(Set dst (AndL (XorL src1 minus_1) src2));
13447 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13448 effect(KILL cr);
13449 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13450
13451 format %{ "andnq $dst, $src1, $src2" %}
13452
13453 ins_encode %{
13454 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13455 %}
13456 ins_pipe(ialu_reg_mem);
13457 %}
13458
13459 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13460 match(Set dst (AndL (SubL imm_zero src) src));
13461 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13462 effect(KILL cr);
13463 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13464
13465 format %{ "blsiq $dst, $src" %}
13466
13467 ins_encode %{
13468 __ blsiq($dst$$Register, $src$$Register);
13469 %}
13470 ins_pipe(ialu_reg);
13471 %}
13472
13473 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13474 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13475 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13476 effect(KILL cr);
13477 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13478
13479 ins_cost(125);
13480 format %{ "blsiq $dst, $src" %}
13481
13482 ins_encode %{
13483 __ blsiq($dst$$Register, $src$$Address);
13484 %}
13485 ins_pipe(ialu_reg_mem);
13486 %}
13487
13488 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13489 %{
13490 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13491 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13492 effect(KILL cr);
13493 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13494
13495 ins_cost(125);
13496 format %{ "blsmskq $dst, $src" %}
13497
13498 ins_encode %{
13499 __ blsmskq($dst$$Register, $src$$Address);
13500 %}
13501 ins_pipe(ialu_reg_mem);
13502 %}
13503
13504 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13505 %{
13506 match(Set dst (XorL (AddL src minus_1) src));
13507 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13508 effect(KILL cr);
13509 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13510
13511 format %{ "blsmskq $dst, $src" %}
13512
13513 ins_encode %{
13514 __ blsmskq($dst$$Register, $src$$Register);
13515 %}
13516
13517 ins_pipe(ialu_reg);
13518 %}
13519
13520 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13521 %{
13522 match(Set dst (AndL (AddL src minus_1) src) );
13523 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13524 effect(KILL cr);
13525 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13526
13527 format %{ "blsrq $dst, $src" %}
13528
13529 ins_encode %{
13530 __ blsrq($dst$$Register, $src$$Register);
13531 %}
13532
13533 ins_pipe(ialu_reg);
13534 %}
13535
13536 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13537 %{
13538 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13539 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13540 effect(KILL cr);
13541 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13542
13543 ins_cost(125);
13544 format %{ "blsrq $dst, $src" %}
13545
13546 ins_encode %{
13547 __ blsrq($dst$$Register, $src$$Address);
13548 %}
13549
13550 ins_pipe(ialu_reg);
13551 %}
13552
13553 // Or Instructions
13554 // Or Register with Register
13555 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13556 %{
13557 predicate(!UseAPX);
13558 match(Set dst (OrL dst src));
13559 effect(KILL cr);
13560 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13561
13562 format %{ "orq $dst, $src\t# long" %}
13563 ins_encode %{
13564 __ orq($dst$$Register, $src$$Register);
13565 %}
13566 ins_pipe(ialu_reg_reg);
13567 %}
13568
13569 // Or Register with Register using New Data Destination (NDD)
13570 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13571 %{
13572 predicate(UseAPX);
13573 match(Set dst (OrL src1 src2));
13574 effect(KILL cr);
13575 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13576
13577 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13578 ins_encode %{
13579 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13580
13581 %}
13582 ins_pipe(ialu_reg_reg);
13583 %}
13584
13585 // Use any_RegP to match R15 (TLS register) without spilling.
13586 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13587 predicate(!UseAPX);
13588 match(Set dst (OrL dst (CastP2X src)));
13589 effect(KILL cr);
13590 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13591
13592 format %{ "orq $dst, $src\t# long" %}
13593 ins_encode %{
13594 __ orq($dst$$Register, $src$$Register);
13595 %}
13596 ins_pipe(ialu_reg_reg);
13597 %}
13598
13599 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13600 predicate(UseAPX);
13601 match(Set dst (OrL src1 (CastP2X src2)));
13602 effect(KILL cr);
13603 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13604
13605 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13606 ins_encode %{
13607 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13608 %}
13609 ins_pipe(ialu_reg_reg);
13610 %}
13611
13612 // Or Register with Immediate
13613 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13614 %{
13615 predicate(!UseAPX);
13616 match(Set dst (OrL dst src));
13617 effect(KILL cr);
13618 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13619
13620 format %{ "orq $dst, $src\t# long" %}
13621 ins_encode %{
13622 __ orq($dst$$Register, $src$$constant);
13623 %}
13624 ins_pipe(ialu_reg);
13625 %}
13626
13627 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13628 %{
13629 predicate(UseAPX);
13630 match(Set dst (OrL src1 src2));
13631 effect(KILL cr);
13632 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13633
13634 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13635 ins_encode %{
13636 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13637 %}
13638 ins_pipe(ialu_reg);
13639 %}
13640
13641 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13642 %{
13643 predicate(UseAPX);
13644 match(Set dst (OrL src1 src2));
13645 effect(KILL cr);
13646 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13647
13648 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13649 ins_encode %{
13650 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13651 %}
13652 ins_pipe(ialu_reg);
13653 %}
13654
13655 // Or Register with Memory
13656 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13657 %{
13658 match(Set dst (OrL dst (LoadL src)));
13659 effect(KILL cr);
13660 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13661
13662 ins_cost(150);
13663 format %{ "orq $dst, $src\t# long" %}
13664 ins_encode %{
13665 __ orq($dst$$Register, $src$$Address);
13666 %}
13667 ins_pipe(ialu_reg_mem);
13668 %}
13669
13670 // Or Memory with Register
13671 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13672 %{
13673 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13674 effect(KILL cr);
13675 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13676
13677 ins_cost(150);
13678 format %{ "orq $dst, $src\t# long" %}
13679 ins_encode %{
13680 __ orq($dst$$Address, $src$$Register);
13681 %}
13682 ins_pipe(ialu_mem_reg);
13683 %}
13684
13685 // Or Memory with Immediate
13686 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13687 %{
13688 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13689 effect(KILL cr);
13690 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13691
13692 ins_cost(125);
13693 format %{ "orq $dst, $src\t# long" %}
13694 ins_encode %{
13695 __ orq($dst$$Address, $src$$constant);
13696 %}
13697 ins_pipe(ialu_mem_imm);
13698 %}
13699
13700 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13701 %{
13702 // con should be a pure 64-bit power of 2 immediate
13703 // because AND/OR works well enough for 8/32-bit values.
13704 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13705
13706 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13707 effect(KILL cr);
13708
13709 ins_cost(125);
13710 format %{ "btsq $dst, log2($con)\t# long" %}
13711 ins_encode %{
13712 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13713 %}
13714 ins_pipe(ialu_mem_imm);
13715 %}
13716
13717 // Xor Instructions
13718 // Xor Register with Register
13719 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13720 %{
13721 predicate(!UseAPX);
13722 match(Set dst (XorL dst src));
13723 effect(KILL cr);
13724 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13725
13726 format %{ "xorq $dst, $src\t# long" %}
13727 ins_encode %{
13728 __ xorq($dst$$Register, $src$$Register);
13729 %}
13730 ins_pipe(ialu_reg_reg);
13731 %}
13732
13733 // Xor Register with Register using New Data Destination (NDD)
13734 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13735 %{
13736 predicate(UseAPX);
13737 match(Set dst (XorL src1 src2));
13738 effect(KILL cr);
13739 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13740
13741 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13742 ins_encode %{
13743 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13744 %}
13745 ins_pipe(ialu_reg_reg);
13746 %}
13747
13748 // Xor Register with Immediate -1
13749 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13750 %{
13751 predicate(!UseAPX);
13752 match(Set dst (XorL dst imm));
13753
13754 format %{ "notq $dst" %}
13755 ins_encode %{
13756 __ notq($dst$$Register);
13757 %}
13758 ins_pipe(ialu_reg);
13759 %}
13760
13761 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
13762 %{
13763 predicate(UseAPX);
13764 match(Set dst (XorL src imm));
13765 flag(PD::Flag_ndd_demotable_opr1);
13766
13767 format %{ "enotq $dst, $src" %}
13768 ins_encode %{
13769 __ enotq($dst$$Register, $src$$Register);
13770 %}
13771 ins_pipe(ialu_reg);
13772 %}
13773
13774 // Xor Register with Immediate
13775 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13776 %{
13777 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
13778 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13779 match(Set dst (XorL dst src));
13780 effect(KILL cr);
13781 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13782
13783 format %{ "xorq $dst, $src\t# long" %}
13784 ins_encode %{
13785 __ xorq($dst$$Register, $src$$constant);
13786 %}
13787 ins_pipe(ialu_reg);
13788 %}
13789
13790 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13791 %{
13792 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
13793 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13794 match(Set dst (XorL src1 src2));
13795 effect(KILL cr);
13796 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13797
13798 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13799 ins_encode %{
13800 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13801 %}
13802 ins_pipe(ialu_reg);
13803 %}
13804
13805 // Xor Register with Memory
13806 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13807 %{
13808 match(Set dst (XorL dst (LoadL src)));
13809 effect(KILL cr);
13810 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13811
13812 ins_cost(150);
13813 format %{ "xorq $dst, $src\t# long" %}
13814 ins_encode %{
13815 __ xorq($dst$$Register, $src$$Address);
13816 %}
13817 ins_pipe(ialu_reg_mem);
13818 %}
13819
13820 // Xor Memory with Register
13821 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13822 %{
13823 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13824 effect(KILL cr);
13825 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13826
13827 ins_cost(150);
13828 format %{ "xorq $dst, $src\t# long" %}
13829 ins_encode %{
13830 __ xorq($dst$$Address, $src$$Register);
13831 %}
13832 ins_pipe(ialu_mem_reg);
13833 %}
13834
13835 // Xor Memory with Immediate
13836 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13837 %{
13838 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13839 effect(KILL cr);
13840 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13841
13842 ins_cost(125);
13843 format %{ "xorq $dst, $src\t# long" %}
13844 ins_encode %{
13845 __ xorq($dst$$Address, $src$$constant);
13846 %}
13847 ins_pipe(ialu_mem_imm);
13848 %}
13849
13850 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
13851 %{
13852 match(Set dst (CmpLTMask p q));
13853 effect(KILL cr);
13854
13855 ins_cost(400);
13856 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
13857 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
13858 "negl $dst" %}
13859 ins_encode %{
13860 __ cmpl($p$$Register, $q$$Register);
13861 __ setcc(Assembler::less, $dst$$Register);
13862 __ negl($dst$$Register);
13863 %}
13864 ins_pipe(pipe_slow);
13865 %}
13866
13867 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
13868 %{
13869 match(Set dst (CmpLTMask dst zero));
13870 effect(KILL cr);
13871
13872 ins_cost(100);
13873 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
13874 ins_encode %{
13875 __ sarl($dst$$Register, 31);
13876 %}
13877 ins_pipe(ialu_reg);
13878 %}
13879
13880 /* Better to save a register than avoid a branch */
13881 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13882 %{
13883 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
13884 effect(KILL cr);
13885 ins_cost(300);
13886 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
13887 "jge done\n\t"
13888 "addl $p,$y\n"
13889 "done: " %}
13890 ins_encode %{
13891 Register Rp = $p$$Register;
13892 Register Rq = $q$$Register;
13893 Register Ry = $y$$Register;
13894 Label done;
13895 __ subl(Rp, Rq);
13896 __ jccb(Assembler::greaterEqual, done);
13897 __ addl(Rp, Ry);
13898 __ bind(done);
13899 %}
13900 ins_pipe(pipe_cmplt);
13901 %}
13902
13903 /* Better to save a register than avoid a branch */
13904 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13905 %{
13906 match(Set y (AndI (CmpLTMask p q) y));
13907 effect(KILL cr);
13908
13909 ins_cost(300);
13910
13911 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
13912 "jlt done\n\t"
13913 "xorl $y, $y\n"
13914 "done: " %}
13915 ins_encode %{
13916 Register Rp = $p$$Register;
13917 Register Rq = $q$$Register;
13918 Register Ry = $y$$Register;
13919 Label done;
13920 __ cmpl(Rp, Rq);
13921 __ jccb(Assembler::less, done);
13922 __ xorl(Ry, Ry);
13923 __ bind(done);
13924 %}
13925 ins_pipe(pipe_cmplt);
13926 %}
13927
13928
13929 //---------- FP Instructions------------------------------------------------
13930
13931 // Really expensive, avoid
13932 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
13933 %{
13934 match(Set cr (CmpF src1 src2));
13935
13936 ins_cost(500);
13937 format %{ "ucomiss $src1, $src2\n\t"
13938 "jnp,s exit\n\t"
13939 "pushfq\t# saw NaN, set CF\n\t"
13940 "andq [rsp], #0xffffff2b\n\t"
13941 "popfq\n"
13942 "exit:" %}
13943 ins_encode %{
13944 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13945 emit_cmpfp_fixup(masm);
13946 %}
13947 ins_pipe(pipe_slow);
13948 %}
13949
13950 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
13951 match(Set cr (CmpF src1 src2));
13952
13953 ins_cost(100);
13954 format %{ "ucomiss $src1, $src2" %}
13955 ins_encode %{
13956 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13957 %}
13958 ins_pipe(pipe_slow);
13959 %}
13960
13961 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
13962 match(Set cr (CmpF src1 src2));
13963
13964 ins_cost(100);
13965 format %{ "evucomxss $src1, $src2" %}
13966 ins_encode %{
13967 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
13968 %}
13969 ins_pipe(pipe_slow);
13970 %}
13971
13972 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
13973 match(Set cr (CmpF src1 (LoadF src2)));
13974
13975 ins_cost(100);
13976 format %{ "ucomiss $src1, $src2" %}
13977 ins_encode %{
13978 __ ucomiss($src1$$XMMRegister, $src2$$Address);
13979 %}
13980 ins_pipe(pipe_slow);
13981 %}
13982
13983 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
13984 match(Set cr (CmpF src1 (LoadF src2)));
13985
13986 ins_cost(100);
13987 format %{ "evucomxss $src1, $src2" %}
13988 ins_encode %{
13989 __ evucomxss($src1$$XMMRegister, $src2$$Address);
13990 %}
13991 ins_pipe(pipe_slow);
13992 %}
13993
13994 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
13995 match(Set cr (CmpF src con));
13996
13997 ins_cost(100);
13998 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13999 ins_encode %{
14000 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14001 %}
14002 ins_pipe(pipe_slow);
14003 %}
14004
14005 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14006 match(Set cr (CmpF src con));
14007
14008 ins_cost(100);
14009 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14010 ins_encode %{
14011 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14012 %}
14013 ins_pipe(pipe_slow);
14014 %}
14015
14016 // Really expensive, avoid
14017 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14018 %{
14019 match(Set cr (CmpD src1 src2));
14020
14021 ins_cost(500);
14022 format %{ "ucomisd $src1, $src2\n\t"
14023 "jnp,s exit\n\t"
14024 "pushfq\t# saw NaN, set CF\n\t"
14025 "andq [rsp], #0xffffff2b\n\t"
14026 "popfq\n"
14027 "exit:" %}
14028 ins_encode %{
14029 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14030 emit_cmpfp_fixup(masm);
14031 %}
14032 ins_pipe(pipe_slow);
14033 %}
14034
14035 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14036 match(Set cr (CmpD src1 src2));
14037
14038 ins_cost(100);
14039 format %{ "ucomisd $src1, $src2 test" %}
14040 ins_encode %{
14041 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14042 %}
14043 ins_pipe(pipe_slow);
14044 %}
14045
14046 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14047 match(Set cr (CmpD src1 src2));
14048
14049 ins_cost(100);
14050 format %{ "evucomxsd $src1, $src2 test" %}
14051 ins_encode %{
14052 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14053 %}
14054 ins_pipe(pipe_slow);
14055 %}
14056
14057 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14058 match(Set cr (CmpD src1 (LoadD src2)));
14059
14060 ins_cost(100);
14061 format %{ "ucomisd $src1, $src2" %}
14062 ins_encode %{
14063 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14064 %}
14065 ins_pipe(pipe_slow);
14066 %}
14067
14068 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14069 match(Set cr (CmpD src1 (LoadD src2)));
14070
14071 ins_cost(100);
14072 format %{ "evucomxsd $src1, $src2" %}
14073 ins_encode %{
14074 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14075 %}
14076 ins_pipe(pipe_slow);
14077 %}
14078
14079 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14080 match(Set cr (CmpD src con));
14081 ins_cost(100);
14082 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14083 ins_encode %{
14084 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14085 %}
14086 ins_pipe(pipe_slow);
14087 %}
14088
14089 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14090 match(Set cr (CmpD src con));
14091
14092 ins_cost(100);
14093 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14094 ins_encode %{
14095 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14096 %}
14097 ins_pipe(pipe_slow);
14098 %}
14099
14100 // Compare into -1,0,1
14101 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14102 %{
14103 match(Set dst (CmpF3 src1 src2));
14104 effect(KILL cr);
14105
14106 ins_cost(275);
14107 format %{ "ucomiss $src1, $src2\n\t"
14108 "movl $dst, #-1\n\t"
14109 "jp,s done\n\t"
14110 "jb,s done\n\t"
14111 "setne $dst\n\t"
14112 "movzbl $dst, $dst\n"
14113 "done:" %}
14114 ins_encode %{
14115 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14116 emit_cmpfp3(masm, $dst$$Register);
14117 %}
14118 ins_pipe(pipe_slow);
14119 %}
14120
14121 // Compare into -1,0,1
14122 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14123 %{
14124 match(Set dst (CmpF3 src1 (LoadF src2)));
14125 effect(KILL cr);
14126
14127 ins_cost(275);
14128 format %{ "ucomiss $src1, $src2\n\t"
14129 "movl $dst, #-1\n\t"
14130 "jp,s done\n\t"
14131 "jb,s done\n\t"
14132 "setne $dst\n\t"
14133 "movzbl $dst, $dst\n"
14134 "done:" %}
14135 ins_encode %{
14136 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14137 emit_cmpfp3(masm, $dst$$Register);
14138 %}
14139 ins_pipe(pipe_slow);
14140 %}
14141
14142 // Compare into -1,0,1
14143 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14144 match(Set dst (CmpF3 src con));
14145 effect(KILL cr);
14146
14147 ins_cost(275);
14148 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14149 "movl $dst, #-1\n\t"
14150 "jp,s done\n\t"
14151 "jb,s done\n\t"
14152 "setne $dst\n\t"
14153 "movzbl $dst, $dst\n"
14154 "done:" %}
14155 ins_encode %{
14156 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14157 emit_cmpfp3(masm, $dst$$Register);
14158 %}
14159 ins_pipe(pipe_slow);
14160 %}
14161
14162 // Compare into -1,0,1
14163 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14164 %{
14165 match(Set dst (CmpD3 src1 src2));
14166 effect(KILL cr);
14167
14168 ins_cost(275);
14169 format %{ "ucomisd $src1, $src2\n\t"
14170 "movl $dst, #-1\n\t"
14171 "jp,s done\n\t"
14172 "jb,s done\n\t"
14173 "setne $dst\n\t"
14174 "movzbl $dst, $dst\n"
14175 "done:" %}
14176 ins_encode %{
14177 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14178 emit_cmpfp3(masm, $dst$$Register);
14179 %}
14180 ins_pipe(pipe_slow);
14181 %}
14182
14183 // Compare into -1,0,1
14184 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14185 %{
14186 match(Set dst (CmpD3 src1 (LoadD src2)));
14187 effect(KILL cr);
14188
14189 ins_cost(275);
14190 format %{ "ucomisd $src1, $src2\n\t"
14191 "movl $dst, #-1\n\t"
14192 "jp,s done\n\t"
14193 "jb,s done\n\t"
14194 "setne $dst\n\t"
14195 "movzbl $dst, $dst\n"
14196 "done:" %}
14197 ins_encode %{
14198 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14199 emit_cmpfp3(masm, $dst$$Register);
14200 %}
14201 ins_pipe(pipe_slow);
14202 %}
14203
14204 // Compare into -1,0,1
14205 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14206 match(Set dst (CmpD3 src con));
14207 effect(KILL cr);
14208
14209 ins_cost(275);
14210 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14211 "movl $dst, #-1\n\t"
14212 "jp,s done\n\t"
14213 "jb,s done\n\t"
14214 "setne $dst\n\t"
14215 "movzbl $dst, $dst\n"
14216 "done:" %}
14217 ins_encode %{
14218 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14219 emit_cmpfp3(masm, $dst$$Register);
14220 %}
14221 ins_pipe(pipe_slow);
14222 %}
14223
14224 //----------Arithmetic Conversion Instructions---------------------------------
14225
14226 instruct convF2D_reg_reg(regD dst, regF src)
14227 %{
14228 match(Set dst (ConvF2D src));
14229
14230 format %{ "cvtss2sd $dst, $src" %}
14231 ins_encode %{
14232 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14233 %}
14234 ins_pipe(pipe_slow); // XXX
14235 %}
14236
14237 instruct convF2D_reg_mem(regD dst, memory src)
14238 %{
14239 predicate(UseAVX == 0);
14240 match(Set dst (ConvF2D (LoadF src)));
14241
14242 format %{ "cvtss2sd $dst, $src" %}
14243 ins_encode %{
14244 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14245 %}
14246 ins_pipe(pipe_slow); // XXX
14247 %}
14248
14249 instruct convD2F_reg_reg(regF dst, regD src)
14250 %{
14251 match(Set dst (ConvD2F src));
14252
14253 format %{ "cvtsd2ss $dst, $src" %}
14254 ins_encode %{
14255 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14256 %}
14257 ins_pipe(pipe_slow); // XXX
14258 %}
14259
14260 instruct convD2F_reg_mem(regF dst, memory src)
14261 %{
14262 predicate(UseAVX == 0);
14263 match(Set dst (ConvD2F (LoadD src)));
14264
14265 format %{ "cvtsd2ss $dst, $src" %}
14266 ins_encode %{
14267 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14268 %}
14269 ins_pipe(pipe_slow); // XXX
14270 %}
14271
14272 // XXX do mem variants
14273 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14274 %{
14275 predicate(!VM_Version::supports_avx10_2());
14276 match(Set dst (ConvF2I src));
14277 effect(KILL cr);
14278 format %{ "convert_f2i $dst, $src" %}
14279 ins_encode %{
14280 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14281 %}
14282 ins_pipe(pipe_slow);
14283 %}
14284
14285 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14286 %{
14287 predicate(VM_Version::supports_avx10_2());
14288 match(Set dst (ConvF2I src));
14289 format %{ "evcvttss2sisl $dst, $src" %}
14290 ins_encode %{
14291 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14292 %}
14293 ins_pipe(pipe_slow);
14294 %}
14295
14296 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14297 %{
14298 predicate(VM_Version::supports_avx10_2());
14299 match(Set dst (ConvF2I (LoadF src)));
14300 format %{ "evcvttss2sisl $dst, $src" %}
14301 ins_encode %{
14302 __ evcvttss2sisl($dst$$Register, $src$$Address);
14303 %}
14304 ins_pipe(pipe_slow);
14305 %}
14306
14307 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14308 %{
14309 predicate(!VM_Version::supports_avx10_2());
14310 match(Set dst (ConvF2L src));
14311 effect(KILL cr);
14312 format %{ "convert_f2l $dst, $src"%}
14313 ins_encode %{
14314 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14315 %}
14316 ins_pipe(pipe_slow);
14317 %}
14318
14319 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14320 %{
14321 predicate(VM_Version::supports_avx10_2());
14322 match(Set dst (ConvF2L src));
14323 format %{ "evcvttss2sisq $dst, $src" %}
14324 ins_encode %{
14325 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14326 %}
14327 ins_pipe(pipe_slow);
14328 %}
14329
14330 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14331 %{
14332 predicate(VM_Version::supports_avx10_2());
14333 match(Set dst (ConvF2L (LoadF src)));
14334 format %{ "evcvttss2sisq $dst, $src" %}
14335 ins_encode %{
14336 __ evcvttss2sisq($dst$$Register, $src$$Address);
14337 %}
14338 ins_pipe(pipe_slow);
14339 %}
14340
14341 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14342 %{
14343 predicate(!VM_Version::supports_avx10_2());
14344 match(Set dst (ConvD2I src));
14345 effect(KILL cr);
14346 format %{ "convert_d2i $dst, $src"%}
14347 ins_encode %{
14348 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14349 %}
14350 ins_pipe(pipe_slow);
14351 %}
14352
14353 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14354 %{
14355 predicate(VM_Version::supports_avx10_2());
14356 match(Set dst (ConvD2I src));
14357 format %{ "evcvttsd2sisl $dst, $src" %}
14358 ins_encode %{
14359 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14360 %}
14361 ins_pipe(pipe_slow);
14362 %}
14363
14364 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14365 %{
14366 predicate(VM_Version::supports_avx10_2());
14367 match(Set dst (ConvD2I (LoadD src)));
14368 format %{ "evcvttsd2sisl $dst, $src" %}
14369 ins_encode %{
14370 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14371 %}
14372 ins_pipe(pipe_slow);
14373 %}
14374
14375 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14376 %{
14377 predicate(!VM_Version::supports_avx10_2());
14378 match(Set dst (ConvD2L src));
14379 effect(KILL cr);
14380 format %{ "convert_d2l $dst, $src"%}
14381 ins_encode %{
14382 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14383 %}
14384 ins_pipe(pipe_slow);
14385 %}
14386
14387 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14388 %{
14389 predicate(VM_Version::supports_avx10_2());
14390 match(Set dst (ConvD2L src));
14391 format %{ "evcvttsd2sisq $dst, $src" %}
14392 ins_encode %{
14393 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14394 %}
14395 ins_pipe(pipe_slow);
14396 %}
14397
14398 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14399 %{
14400 predicate(VM_Version::supports_avx10_2());
14401 match(Set dst (ConvD2L (LoadD src)));
14402 format %{ "evcvttsd2sisq $dst, $src" %}
14403 ins_encode %{
14404 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14405 %}
14406 ins_pipe(pipe_slow);
14407 %}
14408
14409 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14410 %{
14411 match(Set dst (RoundD src));
14412 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14413 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14414 ins_encode %{
14415 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14416 %}
14417 ins_pipe(pipe_slow);
14418 %}
14419
14420 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14421 %{
14422 match(Set dst (RoundF src));
14423 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14424 format %{ "round_float $dst,$src" %}
14425 ins_encode %{
14426 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14427 %}
14428 ins_pipe(pipe_slow);
14429 %}
14430
14431 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14432 %{
14433 predicate(!UseXmmI2F);
14434 match(Set dst (ConvI2F src));
14435
14436 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14437 ins_encode %{
14438 if (UseAVX > 0) {
14439 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14440 }
14441 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14442 %}
14443 ins_pipe(pipe_slow); // XXX
14444 %}
14445
14446 instruct convI2F_reg_mem(regF dst, memory src)
14447 %{
14448 predicate(UseAVX == 0);
14449 match(Set dst (ConvI2F (LoadI src)));
14450
14451 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14452 ins_encode %{
14453 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14454 %}
14455 ins_pipe(pipe_slow); // XXX
14456 %}
14457
14458 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14459 %{
14460 predicate(!UseXmmI2D);
14461 match(Set dst (ConvI2D src));
14462
14463 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14464 ins_encode %{
14465 if (UseAVX > 0) {
14466 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14467 }
14468 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14469 %}
14470 ins_pipe(pipe_slow); // XXX
14471 %}
14472
14473 instruct convI2D_reg_mem(regD dst, memory src)
14474 %{
14475 predicate(UseAVX == 0);
14476 match(Set dst (ConvI2D (LoadI src)));
14477
14478 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14479 ins_encode %{
14480 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14481 %}
14482 ins_pipe(pipe_slow); // XXX
14483 %}
14484
14485 instruct convXI2F_reg(regF dst, rRegI src)
14486 %{
14487 predicate(UseXmmI2F);
14488 match(Set dst (ConvI2F src));
14489
14490 format %{ "movdl $dst, $src\n\t"
14491 "cvtdq2psl $dst, $dst\t# i2f" %}
14492 ins_encode %{
14493 __ movdl($dst$$XMMRegister, $src$$Register);
14494 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14495 %}
14496 ins_pipe(pipe_slow); // XXX
14497 %}
14498
14499 instruct convXI2D_reg(regD dst, rRegI src)
14500 %{
14501 predicate(UseXmmI2D);
14502 match(Set dst (ConvI2D src));
14503
14504 format %{ "movdl $dst, $src\n\t"
14505 "cvtdq2pdl $dst, $dst\t# i2d" %}
14506 ins_encode %{
14507 __ movdl($dst$$XMMRegister, $src$$Register);
14508 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14509 %}
14510 ins_pipe(pipe_slow); // XXX
14511 %}
14512
14513 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14514 %{
14515 match(Set dst (ConvL2F src));
14516
14517 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14518 ins_encode %{
14519 if (UseAVX > 0) {
14520 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14521 }
14522 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14523 %}
14524 ins_pipe(pipe_slow); // XXX
14525 %}
14526
14527 instruct convL2F_reg_mem(regF dst, memory src)
14528 %{
14529 predicate(UseAVX == 0);
14530 match(Set dst (ConvL2F (LoadL src)));
14531
14532 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14533 ins_encode %{
14534 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14535 %}
14536 ins_pipe(pipe_slow); // XXX
14537 %}
14538
14539 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14540 %{
14541 match(Set dst (ConvL2D src));
14542
14543 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14544 ins_encode %{
14545 if (UseAVX > 0) {
14546 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14547 }
14548 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14549 %}
14550 ins_pipe(pipe_slow); // XXX
14551 %}
14552
14553 instruct convL2D_reg_mem(regD dst, memory src)
14554 %{
14555 predicate(UseAVX == 0);
14556 match(Set dst (ConvL2D (LoadL src)));
14557
14558 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14559 ins_encode %{
14560 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14561 %}
14562 ins_pipe(pipe_slow); // XXX
14563 %}
14564
14565 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14566 %{
14567 match(Set dst (ConvI2L src));
14568
14569 ins_cost(125);
14570 format %{ "movslq $dst, $src\t# i2l" %}
14571 ins_encode %{
14572 __ movslq($dst$$Register, $src$$Register);
14573 %}
14574 ins_pipe(ialu_reg_reg);
14575 %}
14576
14577 // Zero-extend convert int to long
14578 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14579 %{
14580 match(Set dst (AndL (ConvI2L src) mask));
14581
14582 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14583 ins_encode %{
14584 if ($dst$$reg != $src$$reg) {
14585 __ movl($dst$$Register, $src$$Register);
14586 }
14587 %}
14588 ins_pipe(ialu_reg_reg);
14589 %}
14590
14591 // Zero-extend convert int to long
14592 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14593 %{
14594 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14595
14596 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14597 ins_encode %{
14598 __ movl($dst$$Register, $src$$Address);
14599 %}
14600 ins_pipe(ialu_reg_mem);
14601 %}
14602
14603 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14604 %{
14605 match(Set dst (AndL src mask));
14606
14607 format %{ "movl $dst, $src\t# zero-extend long" %}
14608 ins_encode %{
14609 __ movl($dst$$Register, $src$$Register);
14610 %}
14611 ins_pipe(ialu_reg_reg);
14612 %}
14613
14614 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14615 %{
14616 match(Set dst (ConvL2I src));
14617
14618 format %{ "movl $dst, $src\t# l2i" %}
14619 ins_encode %{
14620 __ movl($dst$$Register, $src$$Register);
14621 %}
14622 ins_pipe(ialu_reg_reg);
14623 %}
14624
14625
14626 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14627 match(Set dst (MoveF2I src));
14628 effect(DEF dst, USE src);
14629
14630 ins_cost(125);
14631 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14632 ins_encode %{
14633 __ movl($dst$$Register, Address(rsp, $src$$disp));
14634 %}
14635 ins_pipe(ialu_reg_mem);
14636 %}
14637
14638 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14639 match(Set dst (MoveI2F src));
14640 effect(DEF dst, USE src);
14641
14642 ins_cost(125);
14643 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14644 ins_encode %{
14645 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14646 %}
14647 ins_pipe(pipe_slow);
14648 %}
14649
14650 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14651 match(Set dst (MoveD2L src));
14652 effect(DEF dst, USE src);
14653
14654 ins_cost(125);
14655 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14656 ins_encode %{
14657 __ movq($dst$$Register, Address(rsp, $src$$disp));
14658 %}
14659 ins_pipe(ialu_reg_mem);
14660 %}
14661
14662 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14663 predicate(!UseXmmLoadAndClearUpper);
14664 match(Set dst (MoveL2D src));
14665 effect(DEF dst, USE src);
14666
14667 ins_cost(125);
14668 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14669 ins_encode %{
14670 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14671 %}
14672 ins_pipe(pipe_slow);
14673 %}
14674
14675 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14676 predicate(UseXmmLoadAndClearUpper);
14677 match(Set dst (MoveL2D src));
14678 effect(DEF dst, USE src);
14679
14680 ins_cost(125);
14681 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14682 ins_encode %{
14683 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14684 %}
14685 ins_pipe(pipe_slow);
14686 %}
14687
14688
14689 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14690 match(Set dst (MoveF2I src));
14691 effect(DEF dst, USE src);
14692
14693 ins_cost(95); // XXX
14694 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14695 ins_encode %{
14696 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14697 %}
14698 ins_pipe(pipe_slow);
14699 %}
14700
14701 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14702 match(Set dst (MoveI2F src));
14703 effect(DEF dst, USE src);
14704
14705 ins_cost(100);
14706 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14707 ins_encode %{
14708 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14709 %}
14710 ins_pipe( ialu_mem_reg );
14711 %}
14712
14713 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14714 match(Set dst (MoveD2L src));
14715 effect(DEF dst, USE src);
14716
14717 ins_cost(95); // XXX
14718 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14719 ins_encode %{
14720 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14721 %}
14722 ins_pipe(pipe_slow);
14723 %}
14724
14725 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14726 match(Set dst (MoveL2D src));
14727 effect(DEF dst, USE src);
14728
14729 ins_cost(100);
14730 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14731 ins_encode %{
14732 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14733 %}
14734 ins_pipe(ialu_mem_reg);
14735 %}
14736
14737 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14738 match(Set dst (MoveF2I src));
14739 effect(DEF dst, USE src);
14740 ins_cost(85);
14741 format %{ "movd $dst,$src\t# MoveF2I" %}
14742 ins_encode %{
14743 __ movdl($dst$$Register, $src$$XMMRegister);
14744 %}
14745 ins_pipe( pipe_slow );
14746 %}
14747
14748 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14749 match(Set dst (MoveD2L src));
14750 effect(DEF dst, USE src);
14751 ins_cost(85);
14752 format %{ "movd $dst,$src\t# MoveD2L" %}
14753 ins_encode %{
14754 __ movdq($dst$$Register, $src$$XMMRegister);
14755 %}
14756 ins_pipe( pipe_slow );
14757 %}
14758
14759 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14760 match(Set dst (MoveI2F src));
14761 effect(DEF dst, USE src);
14762 ins_cost(100);
14763 format %{ "movd $dst,$src\t# MoveI2F" %}
14764 ins_encode %{
14765 __ movdl($dst$$XMMRegister, $src$$Register);
14766 %}
14767 ins_pipe( pipe_slow );
14768 %}
14769
14770 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14771 match(Set dst (MoveL2D src));
14772 effect(DEF dst, USE src);
14773 ins_cost(100);
14774 format %{ "movd $dst,$src\t# MoveL2D" %}
14775 ins_encode %{
14776 __ movdq($dst$$XMMRegister, $src$$Register);
14777 %}
14778 ins_pipe( pipe_slow );
14779 %}
14780
14781
14782 // Fast clearing of an array
14783 // Small non-constant lenght ClearArray for non-AVX512 targets.
14784 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14785 Universe dummy, rFlagsReg cr)
14786 %{
14787 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14788 match(Set dummy (ClearArray (Binary cnt base) val));
14789 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14790
14791 format %{ $$template
14792 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14793 $$emit$$"jg LARGE\n\t"
14794 $$emit$$"dec rcx\n\t"
14795 $$emit$$"js DONE\t# Zero length\n\t"
14796 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14797 $$emit$$"dec rcx\n\t"
14798 $$emit$$"jge LOOP\n\t"
14799 $$emit$$"jmp DONE\n\t"
14800 $$emit$$"# LARGE:\n\t"
14801 if (UseFastStosb) {
14802 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14803 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14804 } else if (UseXMMForObjInit) {
14805 $$emit$$"movdq $tmp, $val\n\t"
14806 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14807 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14808 $$emit$$"jmpq L_zero_64_bytes\n\t"
14809 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14810 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14811 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14812 $$emit$$"add 0x40,rax\n\t"
14813 $$emit$$"# L_zero_64_bytes:\n\t"
14814 $$emit$$"sub 0x8,rcx\n\t"
14815 $$emit$$"jge L_loop\n\t"
14816 $$emit$$"add 0x4,rcx\n\t"
14817 $$emit$$"jl L_tail\n\t"
14818 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14819 $$emit$$"add 0x20,rax\n\t"
14820 $$emit$$"sub 0x4,rcx\n\t"
14821 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14822 $$emit$$"add 0x4,rcx\n\t"
14823 $$emit$$"jle L_end\n\t"
14824 $$emit$$"dec rcx\n\t"
14825 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14826 $$emit$$"vmovq xmm0,(rax)\n\t"
14827 $$emit$$"add 0x8,rax\n\t"
14828 $$emit$$"dec rcx\n\t"
14829 $$emit$$"jge L_sloop\n\t"
14830 $$emit$$"# L_end:\n\t"
14831 } else {
14832 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14833 }
14834 $$emit$$"# DONE"
14835 %}
14836 ins_encode %{
14837 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14838 $tmp$$XMMRegister, false, false);
14839 %}
14840 ins_pipe(pipe_slow);
14841 %}
14842
14843 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14844 Universe dummy, rFlagsReg cr)
14845 %{
14846 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14847 match(Set dummy (ClearArray (Binary cnt base) val));
14848 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14849
14850 format %{ $$template
14851 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14852 $$emit$$"jg LARGE\n\t"
14853 $$emit$$"dec rcx\n\t"
14854 $$emit$$"js DONE\t# Zero length\n\t"
14855 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14856 $$emit$$"dec rcx\n\t"
14857 $$emit$$"jge LOOP\n\t"
14858 $$emit$$"jmp DONE\n\t"
14859 $$emit$$"# LARGE:\n\t"
14860 if (UseXMMForObjInit) {
14861 $$emit$$"movdq $tmp, $val\n\t"
14862 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14863 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14864 $$emit$$"jmpq L_zero_64_bytes\n\t"
14865 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14866 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14867 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14868 $$emit$$"add 0x40,rax\n\t"
14869 $$emit$$"# L_zero_64_bytes:\n\t"
14870 $$emit$$"sub 0x8,rcx\n\t"
14871 $$emit$$"jge L_loop\n\t"
14872 $$emit$$"add 0x4,rcx\n\t"
14873 $$emit$$"jl L_tail\n\t"
14874 $$emit$$"vmovdqu $tmp,(rax)\n\t"
14875 $$emit$$"add 0x20,rax\n\t"
14876 $$emit$$"sub 0x4,rcx\n\t"
14877 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14878 $$emit$$"add 0x4,rcx\n\t"
14879 $$emit$$"jle L_end\n\t"
14880 $$emit$$"dec rcx\n\t"
14881 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14882 $$emit$$"vmovq xmm0,(rax)\n\t"
14883 $$emit$$"add 0x8,rax\n\t"
14884 $$emit$$"dec rcx\n\t"
14885 $$emit$$"jge L_sloop\n\t"
14886 $$emit$$"# L_end:\n\t"
14887 } else {
14888 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14889 }
14890 $$emit$$"# DONE"
14891 %}
14892 ins_encode %{
14893 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14894 $tmp$$XMMRegister, false, true);
14895 %}
14896 ins_pipe(pipe_slow);
14897 %}
14898
14899 // Small non-constant length ClearArray for AVX512 targets.
14900 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14901 Universe dummy, rFlagsReg cr)
14902 %{
14903 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14904 match(Set dummy (ClearArray (Binary cnt base) val));
14905 ins_cost(125);
14906 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14907
14908 format %{ $$template
14909 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14910 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14911 $$emit$$"jg LARGE\n\t"
14912 $$emit$$"dec rcx\n\t"
14913 $$emit$$"js DONE\t# Zero length\n\t"
14914 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14915 $$emit$$"dec rcx\n\t"
14916 $$emit$$"jge LOOP\n\t"
14917 $$emit$$"jmp DONE\n\t"
14918 $$emit$$"# LARGE:\n\t"
14919 if (UseFastStosb) {
14920 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14921 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14922 } else if (UseXMMForObjInit) {
14923 $$emit$$"mov rdi,rax\n\t"
14924 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14925 $$emit$$"jmpq L_zero_64_bytes\n\t"
14926 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14927 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14928 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14929 $$emit$$"add 0x40,rax\n\t"
14930 $$emit$$"# L_zero_64_bytes:\n\t"
14931 $$emit$$"sub 0x8,rcx\n\t"
14932 $$emit$$"jge L_loop\n\t"
14933 $$emit$$"add 0x4,rcx\n\t"
14934 $$emit$$"jl L_tail\n\t"
14935 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14936 $$emit$$"add 0x20,rax\n\t"
14937 $$emit$$"sub 0x4,rcx\n\t"
14938 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14939 $$emit$$"add 0x4,rcx\n\t"
14940 $$emit$$"jle L_end\n\t"
14941 $$emit$$"dec rcx\n\t"
14942 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14943 $$emit$$"vmovq xmm0,(rax)\n\t"
14944 $$emit$$"add 0x8,rax\n\t"
14945 $$emit$$"dec rcx\n\t"
14946 $$emit$$"jge L_sloop\n\t"
14947 $$emit$$"# L_end:\n\t"
14948 } else {
14949 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14950 }
14951 $$emit$$"# DONE"
14952 %}
14953 ins_encode %{
14954 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14955 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
14956 %}
14957 ins_pipe(pipe_slow);
14958 %}
14959
14960 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14961 Universe dummy, rFlagsReg cr)
14962 %{
14963 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14964 match(Set dummy (ClearArray (Binary cnt base) val));
14965 ins_cost(125);
14966 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14967
14968 format %{ $$template
14969 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14970 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14971 $$emit$$"jg LARGE\n\t"
14972 $$emit$$"dec rcx\n\t"
14973 $$emit$$"js DONE\t# Zero length\n\t"
14974 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14975 $$emit$$"dec rcx\n\t"
14976 $$emit$$"jge LOOP\n\t"
14977 $$emit$$"jmp DONE\n\t"
14978 $$emit$$"# LARGE:\n\t"
14979 if (UseFastStosb) {
14980 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14981 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14982 } else if (UseXMMForObjInit) {
14983 $$emit$$"mov rdi,rax\n\t"
14984 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14985 $$emit$$"jmpq L_zero_64_bytes\n\t"
14986 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14987 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14988 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14989 $$emit$$"add 0x40,rax\n\t"
14990 $$emit$$"# L_zero_64_bytes:\n\t"
14991 $$emit$$"sub 0x8,rcx\n\t"
14992 $$emit$$"jge L_loop\n\t"
14993 $$emit$$"add 0x4,rcx\n\t"
14994 $$emit$$"jl L_tail\n\t"
14995 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14996 $$emit$$"add 0x20,rax\n\t"
14997 $$emit$$"sub 0x4,rcx\n\t"
14998 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14999 $$emit$$"add 0x4,rcx\n\t"
15000 $$emit$$"jle L_end\n\t"
15001 $$emit$$"dec rcx\n\t"
15002 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15003 $$emit$$"vmovq xmm0,(rax)\n\t"
15004 $$emit$$"add 0x8,rax\n\t"
15005 $$emit$$"dec rcx\n\t"
15006 $$emit$$"jge L_sloop\n\t"
15007 $$emit$$"# L_end:\n\t"
15008 } else {
15009 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15010 }
15011 $$emit$$"# DONE"
15012 %}
15013 ins_encode %{
15014 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15015 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15016 %}
15017 ins_pipe(pipe_slow);
15018 %}
15019
15020 // Large non-constant length ClearArray for non-AVX512 targets.
15021 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15022 Universe dummy, rFlagsReg cr)
15023 %{
15024 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15025 match(Set dummy (ClearArray (Binary cnt base) val));
15026 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15027
15028 format %{ $$template
15029 if (UseFastStosb) {
15030 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15031 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15032 } else if (UseXMMForObjInit) {
15033 $$emit$$"movdq $tmp, $val\n\t"
15034 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15035 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15036 $$emit$$"jmpq L_zero_64_bytes\n\t"
15037 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15038 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15039 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15040 $$emit$$"add 0x40,rax\n\t"
15041 $$emit$$"# L_zero_64_bytes:\n\t"
15042 $$emit$$"sub 0x8,rcx\n\t"
15043 $$emit$$"jge L_loop\n\t"
15044 $$emit$$"add 0x4,rcx\n\t"
15045 $$emit$$"jl L_tail\n\t"
15046 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15047 $$emit$$"add 0x20,rax\n\t"
15048 $$emit$$"sub 0x4,rcx\n\t"
15049 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15050 $$emit$$"add 0x4,rcx\n\t"
15051 $$emit$$"jle L_end\n\t"
15052 $$emit$$"dec rcx\n\t"
15053 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15054 $$emit$$"vmovq xmm0,(rax)\n\t"
15055 $$emit$$"add 0x8,rax\n\t"
15056 $$emit$$"dec rcx\n\t"
15057 $$emit$$"jge L_sloop\n\t"
15058 $$emit$$"# L_end:\n\t"
15059 } else {
15060 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15061 }
15062 %}
15063 ins_encode %{
15064 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15065 $tmp$$XMMRegister, true, false);
15066 %}
15067 ins_pipe(pipe_slow);
15068 %}
15069
15070 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15071 Universe dummy, rFlagsReg cr)
15072 %{
15073 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15074 match(Set dummy (ClearArray (Binary cnt base) val));
15075 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15076
15077 format %{ $$template
15078 if (UseXMMForObjInit) {
15079 $$emit$$"movdq $tmp, $val\n\t"
15080 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15081 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15082 $$emit$$"jmpq L_zero_64_bytes\n\t"
15083 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15084 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15085 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15086 $$emit$$"add 0x40,rax\n\t"
15087 $$emit$$"# L_zero_64_bytes:\n\t"
15088 $$emit$$"sub 0x8,rcx\n\t"
15089 $$emit$$"jge L_loop\n\t"
15090 $$emit$$"add 0x4,rcx\n\t"
15091 $$emit$$"jl L_tail\n\t"
15092 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15093 $$emit$$"add 0x20,rax\n\t"
15094 $$emit$$"sub 0x4,rcx\n\t"
15095 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15096 $$emit$$"add 0x4,rcx\n\t"
15097 $$emit$$"jle L_end\n\t"
15098 $$emit$$"dec rcx\n\t"
15099 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15100 $$emit$$"vmovq xmm0,(rax)\n\t"
15101 $$emit$$"add 0x8,rax\n\t"
15102 $$emit$$"dec rcx\n\t"
15103 $$emit$$"jge L_sloop\n\t"
15104 $$emit$$"# L_end:\n\t"
15105 } else {
15106 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15107 }
15108 %}
15109 ins_encode %{
15110 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15111 $tmp$$XMMRegister, true, true);
15112 %}
15113 ins_pipe(pipe_slow);
15114 %}
15115
15116 // Large non-constant length ClearArray for AVX512 targets.
15117 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15118 Universe dummy, rFlagsReg cr)
15119 %{
15120 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15121 match(Set dummy (ClearArray (Binary cnt base) val));
15122 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15123
15124 format %{ $$template
15125 if (UseFastStosb) {
15126 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15127 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15128 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15129 } else if (UseXMMForObjInit) {
15130 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15131 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15132 $$emit$$"jmpq L_zero_64_bytes\n\t"
15133 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15134 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15135 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15136 $$emit$$"add 0x40,rax\n\t"
15137 $$emit$$"# L_zero_64_bytes:\n\t"
15138 $$emit$$"sub 0x8,rcx\n\t"
15139 $$emit$$"jge L_loop\n\t"
15140 $$emit$$"add 0x4,rcx\n\t"
15141 $$emit$$"jl L_tail\n\t"
15142 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15143 $$emit$$"add 0x20,rax\n\t"
15144 $$emit$$"sub 0x4,rcx\n\t"
15145 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15146 $$emit$$"add 0x4,rcx\n\t"
15147 $$emit$$"jle L_end\n\t"
15148 $$emit$$"dec rcx\n\t"
15149 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15150 $$emit$$"vmovq xmm0,(rax)\n\t"
15151 $$emit$$"add 0x8,rax\n\t"
15152 $$emit$$"dec rcx\n\t"
15153 $$emit$$"jge L_sloop\n\t"
15154 $$emit$$"# L_end:\n\t"
15155 } else {
15156 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15157 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15158 }
15159 %}
15160 ins_encode %{
15161 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15162 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15163 %}
15164 ins_pipe(pipe_slow);
15165 %}
15166
15167 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15168 Universe dummy, rFlagsReg cr)
15169 %{
15170 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15171 match(Set dummy (ClearArray (Binary cnt base) val));
15172 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15173
15174 format %{ $$template
15175 if (UseFastStosb) {
15176 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15177 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15178 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15179 } else if (UseXMMForObjInit) {
15180 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15181 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15182 $$emit$$"jmpq L_zero_64_bytes\n\t"
15183 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15184 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15185 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15186 $$emit$$"add 0x40,rax\n\t"
15187 $$emit$$"# L_zero_64_bytes:\n\t"
15188 $$emit$$"sub 0x8,rcx\n\t"
15189 $$emit$$"jge L_loop\n\t"
15190 $$emit$$"add 0x4,rcx\n\t"
15191 $$emit$$"jl L_tail\n\t"
15192 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15193 $$emit$$"add 0x20,rax\n\t"
15194 $$emit$$"sub 0x4,rcx\n\t"
15195 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15196 $$emit$$"add 0x4,rcx\n\t"
15197 $$emit$$"jle L_end\n\t"
15198 $$emit$$"dec rcx\n\t"
15199 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15200 $$emit$$"vmovq xmm0,(rax)\n\t"
15201 $$emit$$"add 0x8,rax\n\t"
15202 $$emit$$"dec rcx\n\t"
15203 $$emit$$"jge L_sloop\n\t"
15204 $$emit$$"# L_end:\n\t"
15205 } else {
15206 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15207 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15208 }
15209 %}
15210 ins_encode %{
15211 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15212 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15213 %}
15214 ins_pipe(pipe_slow);
15215 %}
15216
15217 // Small constant length ClearArray for AVX512 targets.
15218 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15219 %{
15220 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15221 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15222 match(Set dummy (ClearArray (Binary cnt base) val));
15223 ins_cost(100);
15224 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15225 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15226 ins_encode %{
15227 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15228 %}
15229 ins_pipe(pipe_slow);
15230 %}
15231
15232 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15233 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15234 %{
15235 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15236 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15237 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15238
15239 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15240 ins_encode %{
15241 __ string_compare($str1$$Register, $str2$$Register,
15242 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15243 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15244 %}
15245 ins_pipe( pipe_slow );
15246 %}
15247
15248 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15249 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15250 %{
15251 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15252 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15253 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15254
15255 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15256 ins_encode %{
15257 __ string_compare($str1$$Register, $str2$$Register,
15258 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15259 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15260 %}
15261 ins_pipe( pipe_slow );
15262 %}
15263
15264 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15265 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15266 %{
15267 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15268 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15269 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15270
15271 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15272 ins_encode %{
15273 __ string_compare($str1$$Register, $str2$$Register,
15274 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15275 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15276 %}
15277 ins_pipe( pipe_slow );
15278 %}
15279
15280 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15281 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15282 %{
15283 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15284 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15285 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15286
15287 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15288 ins_encode %{
15289 __ string_compare($str1$$Register, $str2$$Register,
15290 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15291 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15292 %}
15293 ins_pipe( pipe_slow );
15294 %}
15295
15296 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15297 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15298 %{
15299 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15300 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15301 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15302
15303 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15304 ins_encode %{
15305 __ string_compare($str1$$Register, $str2$$Register,
15306 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15307 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15308 %}
15309 ins_pipe( pipe_slow );
15310 %}
15311
15312 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15313 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15314 %{
15315 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15316 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15317 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15318
15319 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15320 ins_encode %{
15321 __ string_compare($str1$$Register, $str2$$Register,
15322 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15323 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15324 %}
15325 ins_pipe( pipe_slow );
15326 %}
15327
15328 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15329 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15330 %{
15331 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15332 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15333 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15334
15335 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15336 ins_encode %{
15337 __ string_compare($str2$$Register, $str1$$Register,
15338 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15339 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15340 %}
15341 ins_pipe( pipe_slow );
15342 %}
15343
15344 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15345 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15346 %{
15347 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15348 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15349 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15350
15351 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15352 ins_encode %{
15353 __ string_compare($str2$$Register, $str1$$Register,
15354 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15355 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15356 %}
15357 ins_pipe( pipe_slow );
15358 %}
15359
15360 // fast search of substring with known size.
15361 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15362 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15363 %{
15364 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15365 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15366 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15367
15368 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15369 ins_encode %{
15370 int icnt2 = (int)$int_cnt2$$constant;
15371 if (icnt2 >= 16) {
15372 // IndexOf for constant substrings with size >= 16 elements
15373 // which don't need to be loaded through stack.
15374 __ string_indexofC8($str1$$Register, $str2$$Register,
15375 $cnt1$$Register, $cnt2$$Register,
15376 icnt2, $result$$Register,
15377 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15378 } else {
15379 // Small strings are loaded through stack if they cross page boundary.
15380 __ string_indexof($str1$$Register, $str2$$Register,
15381 $cnt1$$Register, $cnt2$$Register,
15382 icnt2, $result$$Register,
15383 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15384 }
15385 %}
15386 ins_pipe( pipe_slow );
15387 %}
15388
15389 // fast search of substring with known size.
15390 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15391 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15392 %{
15393 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15394 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15395 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15396
15397 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15398 ins_encode %{
15399 int icnt2 = (int)$int_cnt2$$constant;
15400 if (icnt2 >= 8) {
15401 // IndexOf for constant substrings with size >= 8 elements
15402 // which don't need to be loaded through stack.
15403 __ string_indexofC8($str1$$Register, $str2$$Register,
15404 $cnt1$$Register, $cnt2$$Register,
15405 icnt2, $result$$Register,
15406 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15407 } else {
15408 // Small strings are loaded through stack if they cross page boundary.
15409 __ string_indexof($str1$$Register, $str2$$Register,
15410 $cnt1$$Register, $cnt2$$Register,
15411 icnt2, $result$$Register,
15412 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15413 }
15414 %}
15415 ins_pipe( pipe_slow );
15416 %}
15417
15418 // fast search of substring with known size.
15419 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15420 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15421 %{
15422 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15423 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15424 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15425
15426 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15427 ins_encode %{
15428 int icnt2 = (int)$int_cnt2$$constant;
15429 if (icnt2 >= 8) {
15430 // IndexOf for constant substrings with size >= 8 elements
15431 // which don't need to be loaded through stack.
15432 __ string_indexofC8($str1$$Register, $str2$$Register,
15433 $cnt1$$Register, $cnt2$$Register,
15434 icnt2, $result$$Register,
15435 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15436 } else {
15437 // Small strings are loaded through stack if they cross page boundary.
15438 __ string_indexof($str1$$Register, $str2$$Register,
15439 $cnt1$$Register, $cnt2$$Register,
15440 icnt2, $result$$Register,
15441 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15442 }
15443 %}
15444 ins_pipe( pipe_slow );
15445 %}
15446
15447 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15448 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15449 %{
15450 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15451 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15452 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15453
15454 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15455 ins_encode %{
15456 __ string_indexof($str1$$Register, $str2$$Register,
15457 $cnt1$$Register, $cnt2$$Register,
15458 (-1), $result$$Register,
15459 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15460 %}
15461 ins_pipe( pipe_slow );
15462 %}
15463
15464 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15465 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15466 %{
15467 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15468 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15469 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15470
15471 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15472 ins_encode %{
15473 __ string_indexof($str1$$Register, $str2$$Register,
15474 $cnt1$$Register, $cnt2$$Register,
15475 (-1), $result$$Register,
15476 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15477 %}
15478 ins_pipe( pipe_slow );
15479 %}
15480
15481 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15482 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15483 %{
15484 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15485 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15486 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15487
15488 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15489 ins_encode %{
15490 __ string_indexof($str1$$Register, $str2$$Register,
15491 $cnt1$$Register, $cnt2$$Register,
15492 (-1), $result$$Register,
15493 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15494 %}
15495 ins_pipe( pipe_slow );
15496 %}
15497
15498 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15499 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15500 %{
15501 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15502 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15503 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15504 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15505 ins_encode %{
15506 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15507 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15508 %}
15509 ins_pipe( pipe_slow );
15510 %}
15511
15512 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15513 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15514 %{
15515 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15516 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15517 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15518 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15519 ins_encode %{
15520 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15521 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15522 %}
15523 ins_pipe( pipe_slow );
15524 %}
15525
15526 // fast string equals
15527 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15528 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15529 %{
15530 predicate(!VM_Version::supports_avx512vlbw());
15531 match(Set result (StrEquals (Binary str1 str2) cnt));
15532 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15533
15534 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15535 ins_encode %{
15536 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15537 $cnt$$Register, $result$$Register, $tmp3$$Register,
15538 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15539 %}
15540 ins_pipe( pipe_slow );
15541 %}
15542
15543 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15544 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15545 %{
15546 predicate(VM_Version::supports_avx512vlbw());
15547 match(Set result (StrEquals (Binary str1 str2) cnt));
15548 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15549
15550 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15551 ins_encode %{
15552 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15553 $cnt$$Register, $result$$Register, $tmp3$$Register,
15554 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15555 %}
15556 ins_pipe( pipe_slow );
15557 %}
15558
15559 // fast array equals
15560 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15561 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15562 %{
15563 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15564 match(Set result (AryEq ary1 ary2));
15565 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15566
15567 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15568 ins_encode %{
15569 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15570 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15571 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15572 %}
15573 ins_pipe( pipe_slow );
15574 %}
15575
15576 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15577 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15578 %{
15579 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15580 match(Set result (AryEq ary1 ary2));
15581 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15582
15583 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15584 ins_encode %{
15585 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15586 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15587 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15588 %}
15589 ins_pipe( pipe_slow );
15590 %}
15591
15592 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15593 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15594 %{
15595 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15596 match(Set result (AryEq ary1 ary2));
15597 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15598
15599 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15600 ins_encode %{
15601 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15602 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15603 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15604 %}
15605 ins_pipe( pipe_slow );
15606 %}
15607
15608 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15609 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15610 %{
15611 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15612 match(Set result (AryEq ary1 ary2));
15613 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15614
15615 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15616 ins_encode %{
15617 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15618 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15619 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15620 %}
15621 ins_pipe( pipe_slow );
15622 %}
15623
15624 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15625 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15626 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15627 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15628 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15629 %{
15630 predicate(UseAVX >= 2);
15631 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15632 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15633 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15634 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15635 USE basic_type, KILL cr);
15636
15637 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15638 ins_encode %{
15639 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15640 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15641 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15642 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15643 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15644 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15645 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15646 %}
15647 ins_pipe( pipe_slow );
15648 %}
15649
15650 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15651 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15652 %{
15653 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15654 match(Set result (CountPositives ary1 len));
15655 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15656
15657 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15658 ins_encode %{
15659 __ count_positives($ary1$$Register, $len$$Register,
15660 $result$$Register, $tmp3$$Register,
15661 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15662 %}
15663 ins_pipe( pipe_slow );
15664 %}
15665
15666 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15667 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15668 %{
15669 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15670 match(Set result (CountPositives ary1 len));
15671 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15672
15673 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15674 ins_encode %{
15675 __ count_positives($ary1$$Register, $len$$Register,
15676 $result$$Register, $tmp3$$Register,
15677 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15678 %}
15679 ins_pipe( pipe_slow );
15680 %}
15681
15682 // fast char[] to byte[] compression
15683 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15684 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15685 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15686 match(Set result (StrCompressedCopy src (Binary dst len)));
15687 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15688 USE_KILL len, KILL tmp5, KILL cr);
15689
15690 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15691 ins_encode %{
15692 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15693 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15694 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15695 knoreg, knoreg);
15696 %}
15697 ins_pipe( pipe_slow );
15698 %}
15699
15700 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15701 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15702 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15703 match(Set result (StrCompressedCopy src (Binary dst len)));
15704 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15705 USE_KILL len, KILL tmp5, KILL cr);
15706
15707 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15708 ins_encode %{
15709 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15710 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15711 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15712 $ktmp1$$KRegister, $ktmp2$$KRegister);
15713 %}
15714 ins_pipe( pipe_slow );
15715 %}
15716 // fast byte[] to char[] inflation
15717 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15718 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15719 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15720 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15721 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15722
15723 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15724 ins_encode %{
15725 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15726 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15727 %}
15728 ins_pipe( pipe_slow );
15729 %}
15730
15731 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15732 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15733 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15734 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15735 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15736
15737 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15738 ins_encode %{
15739 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15740 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15741 %}
15742 ins_pipe( pipe_slow );
15743 %}
15744
15745 // encode char[] to byte[] in ISO_8859_1
15746 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15747 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15748 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15749 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15750 match(Set result (EncodeISOArray src (Binary dst len)));
15751 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15752
15753 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15754 ins_encode %{
15755 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15756 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15757 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15758 %}
15759 ins_pipe( pipe_slow );
15760 %}
15761
15762 // encode char[] to byte[] in ASCII
15763 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15764 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15765 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15766 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15767 match(Set result (EncodeISOArray src (Binary dst len)));
15768 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15769
15770 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15771 ins_encode %{
15772 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15773 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15774 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15775 %}
15776 ins_pipe( pipe_slow );
15777 %}
15778
15779 //----------Overflow Math Instructions-----------------------------------------
15780
15781 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15782 %{
15783 match(Set cr (OverflowAddI op1 op2));
15784 effect(DEF cr, USE_KILL op1, USE op2);
15785
15786 format %{ "addl $op1, $op2\t# overflow check int" %}
15787
15788 ins_encode %{
15789 __ addl($op1$$Register, $op2$$Register);
15790 %}
15791 ins_pipe(ialu_reg_reg);
15792 %}
15793
15794 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15795 %{
15796 match(Set cr (OverflowAddI op1 op2));
15797 effect(DEF cr, USE_KILL op1, USE op2);
15798
15799 format %{ "addl $op1, $op2\t# overflow check int" %}
15800
15801 ins_encode %{
15802 __ addl($op1$$Register, $op2$$constant);
15803 %}
15804 ins_pipe(ialu_reg_reg);
15805 %}
15806
15807 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15808 %{
15809 match(Set cr (OverflowAddL op1 op2));
15810 effect(DEF cr, USE_KILL op1, USE op2);
15811
15812 format %{ "addq $op1, $op2\t# overflow check long" %}
15813 ins_encode %{
15814 __ addq($op1$$Register, $op2$$Register);
15815 %}
15816 ins_pipe(ialu_reg_reg);
15817 %}
15818
15819 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15820 %{
15821 match(Set cr (OverflowAddL op1 op2));
15822 effect(DEF cr, USE_KILL op1, USE op2);
15823
15824 format %{ "addq $op1, $op2\t# overflow check long" %}
15825 ins_encode %{
15826 __ addq($op1$$Register, $op2$$constant);
15827 %}
15828 ins_pipe(ialu_reg_reg);
15829 %}
15830
15831 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15832 %{
15833 match(Set cr (OverflowSubI op1 op2));
15834
15835 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15836 ins_encode %{
15837 __ cmpl($op1$$Register, $op2$$Register);
15838 %}
15839 ins_pipe(ialu_reg_reg);
15840 %}
15841
15842 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15843 %{
15844 match(Set cr (OverflowSubI op1 op2));
15845
15846 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15847 ins_encode %{
15848 __ cmpl($op1$$Register, $op2$$constant);
15849 %}
15850 ins_pipe(ialu_reg_reg);
15851 %}
15852
15853 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15854 %{
15855 match(Set cr (OverflowSubL op1 op2));
15856
15857 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15858 ins_encode %{
15859 __ cmpq($op1$$Register, $op2$$Register);
15860 %}
15861 ins_pipe(ialu_reg_reg);
15862 %}
15863
15864 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15865 %{
15866 match(Set cr (OverflowSubL op1 op2));
15867
15868 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15869 ins_encode %{
15870 __ cmpq($op1$$Register, $op2$$constant);
15871 %}
15872 ins_pipe(ialu_reg_reg);
15873 %}
15874
15875 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15876 %{
15877 match(Set cr (OverflowSubI zero op2));
15878 effect(DEF cr, USE_KILL op2);
15879
15880 format %{ "negl $op2\t# overflow check int" %}
15881 ins_encode %{
15882 __ negl($op2$$Register);
15883 %}
15884 ins_pipe(ialu_reg_reg);
15885 %}
15886
15887 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15888 %{
15889 match(Set cr (OverflowSubL zero op2));
15890 effect(DEF cr, USE_KILL op2);
15891
15892 format %{ "negq $op2\t# overflow check long" %}
15893 ins_encode %{
15894 __ negq($op2$$Register);
15895 %}
15896 ins_pipe(ialu_reg_reg);
15897 %}
15898
15899 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15900 %{
15901 match(Set cr (OverflowMulI op1 op2));
15902 effect(DEF cr, USE_KILL op1, USE op2);
15903
15904 format %{ "imull $op1, $op2\t# overflow check int" %}
15905 ins_encode %{
15906 __ imull($op1$$Register, $op2$$Register);
15907 %}
15908 ins_pipe(ialu_reg_reg_alu0);
15909 %}
15910
15911 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15912 %{
15913 match(Set cr (OverflowMulI op1 op2));
15914 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15915
15916 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15917 ins_encode %{
15918 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15919 %}
15920 ins_pipe(ialu_reg_reg_alu0);
15921 %}
15922
15923 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15924 %{
15925 match(Set cr (OverflowMulL op1 op2));
15926 effect(DEF cr, USE_KILL op1, USE op2);
15927
15928 format %{ "imulq $op1, $op2\t# overflow check long" %}
15929 ins_encode %{
15930 __ imulq($op1$$Register, $op2$$Register);
15931 %}
15932 ins_pipe(ialu_reg_reg_alu0);
15933 %}
15934
15935 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15936 %{
15937 match(Set cr (OverflowMulL op1 op2));
15938 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15939
15940 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15941 ins_encode %{
15942 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15943 %}
15944 ins_pipe(ialu_reg_reg_alu0);
15945 %}
15946
15947
15948 //----------Control Flow Instructions------------------------------------------
15949 // Signed compare Instructions
15950
15951 // XXX more variants!!
15952 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15953 %{
15954 match(Set cr (CmpI op1 op2));
15955 effect(DEF cr, USE op1, USE op2);
15956
15957 format %{ "cmpl $op1, $op2" %}
15958 ins_encode %{
15959 __ cmpl($op1$$Register, $op2$$Register);
15960 %}
15961 ins_pipe(ialu_cr_reg_reg);
15962 %}
15963
15964 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15965 %{
15966 match(Set cr (CmpI op1 op2));
15967
15968 format %{ "cmpl $op1, $op2" %}
15969 ins_encode %{
15970 __ cmpl($op1$$Register, $op2$$constant);
15971 %}
15972 ins_pipe(ialu_cr_reg_imm);
15973 %}
15974
15975 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15976 %{
15977 match(Set cr (CmpI op1 (LoadI op2)));
15978
15979 ins_cost(500); // XXX
15980 format %{ "cmpl $op1, $op2" %}
15981 ins_encode %{
15982 __ cmpl($op1$$Register, $op2$$Address);
15983 %}
15984 ins_pipe(ialu_cr_reg_mem);
15985 %}
15986
15987 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15988 %{
15989 match(Set cr (CmpI src zero));
15990
15991 format %{ "testl $src, $src" %}
15992 ins_encode %{
15993 __ testl($src$$Register, $src$$Register);
15994 %}
15995 ins_pipe(ialu_cr_reg_imm);
15996 %}
15997
15998 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15999 %{
16000 match(Set cr (CmpI (AndI src con) zero));
16001
16002 format %{ "testl $src, $con" %}
16003 ins_encode %{
16004 __ testl($src$$Register, $con$$constant);
16005 %}
16006 ins_pipe(ialu_cr_reg_imm);
16007 %}
16008
16009 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16010 %{
16011 match(Set cr (CmpI (AndI src1 src2) zero));
16012
16013 format %{ "testl $src1, $src2" %}
16014 ins_encode %{
16015 __ testl($src1$$Register, $src2$$Register);
16016 %}
16017 ins_pipe(ialu_cr_reg_imm);
16018 %}
16019
16020 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16021 %{
16022 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16023
16024 format %{ "testl $src, $mem" %}
16025 ins_encode %{
16026 __ testl($src$$Register, $mem$$Address);
16027 %}
16028 ins_pipe(ialu_cr_reg_mem);
16029 %}
16030
16031 // Unsigned compare Instructions; really, same as signed except they
16032 // produce an rFlagsRegU instead of rFlagsReg.
16033 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16034 %{
16035 match(Set cr (CmpU op1 op2));
16036
16037 format %{ "cmpl $op1, $op2\t# unsigned" %}
16038 ins_encode %{
16039 __ cmpl($op1$$Register, $op2$$Register);
16040 %}
16041 ins_pipe(ialu_cr_reg_reg);
16042 %}
16043
16044 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16045 %{
16046 match(Set cr (CmpU op1 op2));
16047
16048 format %{ "cmpl $op1, $op2\t# unsigned" %}
16049 ins_encode %{
16050 __ cmpl($op1$$Register, $op2$$constant);
16051 %}
16052 ins_pipe(ialu_cr_reg_imm);
16053 %}
16054
16055 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16056 %{
16057 match(Set cr (CmpU op1 (LoadI op2)));
16058
16059 ins_cost(500); // XXX
16060 format %{ "cmpl $op1, $op2\t# unsigned" %}
16061 ins_encode %{
16062 __ cmpl($op1$$Register, $op2$$Address);
16063 %}
16064 ins_pipe(ialu_cr_reg_mem);
16065 %}
16066
16067 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16068 %{
16069 match(Set cr (CmpU src zero));
16070
16071 format %{ "testl $src, $src\t# unsigned" %}
16072 ins_encode %{
16073 __ testl($src$$Register, $src$$Register);
16074 %}
16075 ins_pipe(ialu_cr_reg_imm);
16076 %}
16077
16078 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16079 %{
16080 match(Set cr (CmpP op1 op2));
16081
16082 format %{ "cmpq $op1, $op2\t# ptr" %}
16083 ins_encode %{
16084 __ cmpq($op1$$Register, $op2$$Register);
16085 %}
16086 ins_pipe(ialu_cr_reg_reg);
16087 %}
16088
16089 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16090 %{
16091 match(Set cr (CmpP op1 (LoadP op2)));
16092 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16093
16094 ins_cost(500); // XXX
16095 format %{ "cmpq $op1, $op2\t# ptr" %}
16096 ins_encode %{
16097 __ cmpq($op1$$Register, $op2$$Address);
16098 %}
16099 ins_pipe(ialu_cr_reg_mem);
16100 %}
16101
16102 // XXX this is generalized by compP_rReg_mem???
16103 // Compare raw pointer (used in out-of-heap check).
16104 // Only works because non-oop pointers must be raw pointers
16105 // and raw pointers have no anti-dependencies.
16106 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16107 %{
16108 predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
16109 n->in(2)->as_Load()->barrier_data() == 0);
16110 match(Set cr (CmpP op1 (LoadP op2)));
16111
16112 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16113 ins_encode %{
16114 __ cmpq($op1$$Register, $op2$$Address);
16115 %}
16116 ins_pipe(ialu_cr_reg_mem);
16117 %}
16118
16119 // This will generate a signed flags result. This should be OK since
16120 // any compare to a zero should be eq/neq.
16121 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16122 %{
16123 match(Set cr (CmpP src zero));
16124
16125 format %{ "testq $src, $src\t# ptr" %}
16126 ins_encode %{
16127 __ testq($src$$Register, $src$$Register);
16128 %}
16129 ins_pipe(ialu_cr_reg_imm);
16130 %}
16131
16132 // This will generate a signed flags result. This should be OK since
16133 // any compare to a zero should be eq/neq.
16134 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16135 %{
16136 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16137 n->in(1)->as_Load()->barrier_data() == 0);
16138 match(Set cr (CmpP (LoadP op) zero));
16139
16140 ins_cost(500); // XXX
16141 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16142 ins_encode %{
16143 __ testq($op$$Address, 0xFFFFFFFF);
16144 %}
16145 ins_pipe(ialu_cr_reg_imm);
16146 %}
16147
16148 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16149 %{
16150 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16151 n->in(1)->as_Load()->barrier_data() == 0);
16152 match(Set cr (CmpP (LoadP mem) zero));
16153
16154 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16155 ins_encode %{
16156 __ cmpq(r12, $mem$$Address);
16157 %}
16158 ins_pipe(ialu_cr_reg_mem);
16159 %}
16160
16161 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16162 %{
16163 match(Set cr (CmpN op1 op2));
16164
16165 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16166 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16167 ins_pipe(ialu_cr_reg_reg);
16168 %}
16169
16170 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16171 %{
16172 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16173 match(Set cr (CmpN src (LoadN mem)));
16174
16175 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16176 ins_encode %{
16177 __ cmpl($src$$Register, $mem$$Address);
16178 %}
16179 ins_pipe(ialu_cr_reg_mem);
16180 %}
16181
16182 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16183 match(Set cr (CmpN op1 op2));
16184
16185 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16186 ins_encode %{
16187 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16188 %}
16189 ins_pipe(ialu_cr_reg_imm);
16190 %}
16191
16192 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16193 %{
16194 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16195 match(Set cr (CmpN src (LoadN mem)));
16196
16197 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16198 ins_encode %{
16199 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16200 %}
16201 ins_pipe(ialu_cr_reg_mem);
16202 %}
16203
16204 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16205 match(Set cr (CmpN op1 op2));
16206
16207 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16208 ins_encode %{
16209 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16210 %}
16211 ins_pipe(ialu_cr_reg_imm);
16212 %}
16213
16214 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16215 %{
16216 predicate(!UseCompactObjectHeaders);
16217 match(Set cr (CmpN src (LoadNKlass mem)));
16218
16219 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16220 ins_encode %{
16221 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16222 %}
16223 ins_pipe(ialu_cr_reg_mem);
16224 %}
16225
16226 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16227 match(Set cr (CmpN src zero));
16228
16229 format %{ "testl $src, $src\t# compressed ptr" %}
16230 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16231 ins_pipe(ialu_cr_reg_imm);
16232 %}
16233
16234 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16235 %{
16236 predicate(CompressedOops::base() != nullptr &&
16237 n->in(1)->as_Load()->barrier_data() == 0);
16238 match(Set cr (CmpN (LoadN mem) zero));
16239
16240 ins_cost(500); // XXX
16241 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16242 ins_encode %{
16243 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16244 %}
16245 ins_pipe(ialu_cr_reg_mem);
16246 %}
16247
16248 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16249 %{
16250 predicate(CompressedOops::base() == nullptr &&
16251 n->in(1)->as_Load()->barrier_data() == 0);
16252 match(Set cr (CmpN (LoadN mem) zero));
16253
16254 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16255 ins_encode %{
16256 __ cmpl(r12, $mem$$Address);
16257 %}
16258 ins_pipe(ialu_cr_reg_mem);
16259 %}
16260
16261 // Yanked all unsigned pointer compare operations.
16262 // Pointer compares are done with CmpP which is already unsigned.
16263
16264 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16265 %{
16266 match(Set cr (CmpL op1 op2));
16267
16268 format %{ "cmpq $op1, $op2" %}
16269 ins_encode %{
16270 __ cmpq($op1$$Register, $op2$$Register);
16271 %}
16272 ins_pipe(ialu_cr_reg_reg);
16273 %}
16274
16275 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16276 %{
16277 match(Set cr (CmpL op1 op2));
16278
16279 format %{ "cmpq $op1, $op2" %}
16280 ins_encode %{
16281 __ cmpq($op1$$Register, $op2$$constant);
16282 %}
16283 ins_pipe(ialu_cr_reg_imm);
16284 %}
16285
16286 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16287 %{
16288 match(Set cr (CmpL op1 (LoadL op2)));
16289
16290 format %{ "cmpq $op1, $op2" %}
16291 ins_encode %{
16292 __ cmpq($op1$$Register, $op2$$Address);
16293 %}
16294 ins_pipe(ialu_cr_reg_mem);
16295 %}
16296
16297 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16298 %{
16299 match(Set cr (CmpL src zero));
16300
16301 format %{ "testq $src, $src" %}
16302 ins_encode %{
16303 __ testq($src$$Register, $src$$Register);
16304 %}
16305 ins_pipe(ialu_cr_reg_imm);
16306 %}
16307
16308 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16309 %{
16310 match(Set cr (CmpL (AndL src con) zero));
16311
16312 format %{ "testq $src, $con\t# long" %}
16313 ins_encode %{
16314 __ testq($src$$Register, $con$$constant);
16315 %}
16316 ins_pipe(ialu_cr_reg_imm);
16317 %}
16318
16319 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16320 %{
16321 match(Set cr (CmpL (AndL src1 src2) zero));
16322
16323 format %{ "testq $src1, $src2\t# long" %}
16324 ins_encode %{
16325 __ testq($src1$$Register, $src2$$Register);
16326 %}
16327 ins_pipe(ialu_cr_reg_imm);
16328 %}
16329
16330 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16331 %{
16332 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16333
16334 format %{ "testq $src, $mem" %}
16335 ins_encode %{
16336 __ testq($src$$Register, $mem$$Address);
16337 %}
16338 ins_pipe(ialu_cr_reg_mem);
16339 %}
16340
16341 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16342 %{
16343 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16344
16345 format %{ "testq $src, $mem" %}
16346 ins_encode %{
16347 __ testq($src$$Register, $mem$$Address);
16348 %}
16349 ins_pipe(ialu_cr_reg_mem);
16350 %}
16351
16352 // Manifest a CmpU result in an integer register. Very painful.
16353 // This is the test to avoid.
16354 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16355 %{
16356 match(Set dst (CmpU3 src1 src2));
16357 effect(KILL flags);
16358
16359 ins_cost(275); // XXX
16360 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16361 "movl $dst, -1\n\t"
16362 "jb,u done\n\t"
16363 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16364 "done:" %}
16365 ins_encode %{
16366 Label done;
16367 __ cmpl($src1$$Register, $src2$$Register);
16368 __ movl($dst$$Register, -1);
16369 __ jccb(Assembler::below, done);
16370 __ setcc(Assembler::notZero, $dst$$Register);
16371 __ bind(done);
16372 %}
16373 ins_pipe(pipe_slow);
16374 %}
16375
16376 // Manifest a CmpL result in an integer register. Very painful.
16377 // This is the test to avoid.
16378 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16379 %{
16380 match(Set dst (CmpL3 src1 src2));
16381 effect(KILL flags);
16382
16383 ins_cost(275); // XXX
16384 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16385 "movl $dst, -1\n\t"
16386 "jl,s done\n\t"
16387 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16388 "done:" %}
16389 ins_encode %{
16390 Label done;
16391 __ cmpq($src1$$Register, $src2$$Register);
16392 __ movl($dst$$Register, -1);
16393 __ jccb(Assembler::less, done);
16394 __ setcc(Assembler::notZero, $dst$$Register);
16395 __ bind(done);
16396 %}
16397 ins_pipe(pipe_slow);
16398 %}
16399
16400 // Manifest a CmpUL result in an integer register. Very painful.
16401 // This is the test to avoid.
16402 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16403 %{
16404 match(Set dst (CmpUL3 src1 src2));
16405 effect(KILL flags);
16406
16407 ins_cost(275); // XXX
16408 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16409 "movl $dst, -1\n\t"
16410 "jb,u done\n\t"
16411 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16412 "done:" %}
16413 ins_encode %{
16414 Label done;
16415 __ cmpq($src1$$Register, $src2$$Register);
16416 __ movl($dst$$Register, -1);
16417 __ jccb(Assembler::below, done);
16418 __ setcc(Assembler::notZero, $dst$$Register);
16419 __ bind(done);
16420 %}
16421 ins_pipe(pipe_slow);
16422 %}
16423
16424 // Unsigned long compare Instructions; really, same as signed long except they
16425 // produce an rFlagsRegU instead of rFlagsReg.
16426 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16427 %{
16428 match(Set cr (CmpUL op1 op2));
16429
16430 format %{ "cmpq $op1, $op2\t# unsigned" %}
16431 ins_encode %{
16432 __ cmpq($op1$$Register, $op2$$Register);
16433 %}
16434 ins_pipe(ialu_cr_reg_reg);
16435 %}
16436
16437 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16438 %{
16439 match(Set cr (CmpUL op1 op2));
16440
16441 format %{ "cmpq $op1, $op2\t# unsigned" %}
16442 ins_encode %{
16443 __ cmpq($op1$$Register, $op2$$constant);
16444 %}
16445 ins_pipe(ialu_cr_reg_imm);
16446 %}
16447
16448 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16449 %{
16450 match(Set cr (CmpUL op1 (LoadL op2)));
16451
16452 format %{ "cmpq $op1, $op2\t# unsigned" %}
16453 ins_encode %{
16454 __ cmpq($op1$$Register, $op2$$Address);
16455 %}
16456 ins_pipe(ialu_cr_reg_mem);
16457 %}
16458
16459 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16460 %{
16461 match(Set cr (CmpUL src zero));
16462
16463 format %{ "testq $src, $src\t# unsigned" %}
16464 ins_encode %{
16465 __ testq($src$$Register, $src$$Register);
16466 %}
16467 ins_pipe(ialu_cr_reg_imm);
16468 %}
16469
16470 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16471 %{
16472 match(Set cr (CmpI (LoadB mem) imm));
16473
16474 ins_cost(125);
16475 format %{ "cmpb $mem, $imm" %}
16476 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16477 ins_pipe(ialu_cr_reg_mem);
16478 %}
16479
16480 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16481 %{
16482 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16483
16484 ins_cost(125);
16485 format %{ "testb $mem, $imm\t# ubyte" %}
16486 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16487 ins_pipe(ialu_cr_reg_mem);
16488 %}
16489
16490 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16491 %{
16492 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16493
16494 ins_cost(125);
16495 format %{ "testb $mem, $imm\t# byte" %}
16496 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16497 ins_pipe(ialu_cr_reg_mem);
16498 %}
16499
16500 //----------Max and Min--------------------------------------------------------
16501 // Min Instructions
16502
16503 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16504 %{
16505 predicate(!UseAPX);
16506 effect(USE_DEF dst, USE src, USE cr);
16507
16508 format %{ "cmovlgt $dst, $src\t# min" %}
16509 ins_encode %{
16510 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16511 %}
16512 ins_pipe(pipe_cmov_reg);
16513 %}
16514
16515 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16516 %{
16517 predicate(UseAPX);
16518 effect(DEF dst, USE src1, USE src2, USE cr);
16519
16520 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16521 ins_encode %{
16522 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16523 %}
16524 ins_pipe(pipe_cmov_reg);
16525 %}
16526
16527 instruct minI_rReg(rRegI dst, rRegI src)
16528 %{
16529 predicate(!UseAPX);
16530 match(Set dst (MinI dst src));
16531
16532 ins_cost(200);
16533 expand %{
16534 rFlagsReg cr;
16535 compI_rReg(cr, dst, src);
16536 cmovI_reg_g(dst, src, cr);
16537 %}
16538 %}
16539
16540 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16541 %{
16542 predicate(UseAPX);
16543 match(Set dst (MinI src1 src2));
16544 effect(DEF dst, USE src1, USE src2);
16545 flag(PD::Flag_ndd_demotable_opr1);
16546
16547 ins_cost(200);
16548 expand %{
16549 rFlagsReg cr;
16550 compI_rReg(cr, src1, src2);
16551 cmovI_reg_g_ndd(dst, src1, src2, cr);
16552 %}
16553 %}
16554
16555 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16556 %{
16557 predicate(!UseAPX);
16558 effect(USE_DEF dst, USE src, USE cr);
16559
16560 format %{ "cmovllt $dst, $src\t# max" %}
16561 ins_encode %{
16562 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16563 %}
16564 ins_pipe(pipe_cmov_reg);
16565 %}
16566
16567 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16568 %{
16569 predicate(UseAPX);
16570 effect(DEF dst, USE src1, USE src2, USE cr);
16571
16572 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16573 ins_encode %{
16574 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16575 %}
16576 ins_pipe(pipe_cmov_reg);
16577 %}
16578
16579 instruct maxI_rReg(rRegI dst, rRegI src)
16580 %{
16581 predicate(!UseAPX);
16582 match(Set dst (MaxI dst src));
16583
16584 ins_cost(200);
16585 expand %{
16586 rFlagsReg cr;
16587 compI_rReg(cr, dst, src);
16588 cmovI_reg_l(dst, src, cr);
16589 %}
16590 %}
16591
16592 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16593 %{
16594 predicate(UseAPX);
16595 match(Set dst (MaxI src1 src2));
16596 effect(DEF dst, USE src1, USE src2);
16597 flag(PD::Flag_ndd_demotable_opr1);
16598
16599 ins_cost(200);
16600 expand %{
16601 rFlagsReg cr;
16602 compI_rReg(cr, src1, src2);
16603 cmovI_reg_l_ndd(dst, src1, src2, cr);
16604 %}
16605 %}
16606
16607 // ============================================================================
16608 // Branch Instructions
16609
16610 // Jump Direct - Label defines a relative address from JMP+1
16611 instruct jmpDir(label labl)
16612 %{
16613 match(Goto);
16614 effect(USE labl);
16615
16616 ins_cost(300);
16617 format %{ "jmp $labl" %}
16618 size(5);
16619 ins_encode %{
16620 Label* L = $labl$$label;
16621 __ jmp(*L, false); // Always long jump
16622 %}
16623 ins_pipe(pipe_jmp);
16624 %}
16625
16626 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16627 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16628 %{
16629 match(If cop cr);
16630 effect(USE labl);
16631
16632 ins_cost(300);
16633 format %{ "j$cop $labl" %}
16634 size(6);
16635 ins_encode %{
16636 Label* L = $labl$$label;
16637 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16638 %}
16639 ins_pipe(pipe_jcc);
16640 %}
16641
16642 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16643 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16644 %{
16645 match(CountedLoopEnd cop cr);
16646 effect(USE labl);
16647
16648 ins_cost(300);
16649 format %{ "j$cop $labl\t# loop end" %}
16650 size(6);
16651 ins_encode %{
16652 Label* L = $labl$$label;
16653 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16654 %}
16655 ins_pipe(pipe_jcc);
16656 %}
16657
16658 // Jump Direct Conditional - using unsigned comparison
16659 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16660 match(If cop cmp);
16661 effect(USE labl);
16662
16663 ins_cost(300);
16664 format %{ "j$cop,u $labl" %}
16665 size(6);
16666 ins_encode %{
16667 Label* L = $labl$$label;
16668 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16669 %}
16670 ins_pipe(pipe_jcc);
16671 %}
16672
16673 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16674 match(If cop cmp);
16675 effect(USE labl);
16676
16677 ins_cost(200);
16678 format %{ "j$cop,u $labl" %}
16679 size(6);
16680 ins_encode %{
16681 Label* L = $labl$$label;
16682 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16683 %}
16684 ins_pipe(pipe_jcc);
16685 %}
16686
16687 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16688 match(If cop cmp);
16689 effect(USE labl);
16690
16691 ins_cost(200);
16692 format %{ $$template
16693 if ($cop$$cmpcode == Assembler::notEqual) {
16694 $$emit$$"jp,u $labl\n\t"
16695 $$emit$$"j$cop,u $labl"
16696 } else {
16697 $$emit$$"jp,u done\n\t"
16698 $$emit$$"j$cop,u $labl\n\t"
16699 $$emit$$"done:"
16700 }
16701 %}
16702 ins_encode %{
16703 Label* l = $labl$$label;
16704 if ($cop$$cmpcode == Assembler::notEqual) {
16705 __ jcc(Assembler::parity, *l, false);
16706 __ jcc(Assembler::notEqual, *l, false);
16707 } else if ($cop$$cmpcode == Assembler::equal) {
16708 Label done;
16709 __ jccb(Assembler::parity, done);
16710 __ jcc(Assembler::equal, *l, false);
16711 __ bind(done);
16712 } else {
16713 ShouldNotReachHere();
16714 }
16715 %}
16716 ins_pipe(pipe_jcc);
16717 %}
16718
16719 // Jump Direct Conditional - using signed and unsigned comparison
16720 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16721 match(If cop cmp);
16722 effect(USE labl);
16723
16724 ins_cost(200);
16725 format %{ "j$cop,su $labl" %}
16726 size(6);
16727 ins_encode %{
16728 Label* L = $labl$$label;
16729 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16730 %}
16731 ins_pipe(pipe_jcc);
16732 %}
16733
16734 // ============================================================================
16735 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16736 // superklass array for an instance of the superklass. Set a hidden
16737 // internal cache on a hit (cache is checked with exposed code in
16738 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16739 // encoding ALSO sets flags.
16740
16741 instruct partialSubtypeCheck(rdi_RegP result,
16742 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16743 rFlagsReg cr)
16744 %{
16745 match(Set result (PartialSubtypeCheck sub super));
16746 predicate(!UseSecondarySupersTable);
16747 effect(KILL rcx, KILL cr);
16748
16749 ins_cost(1100); // slightly larger than the next version
16750 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16751 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16752 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16753 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16754 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16755 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16756 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16757 "miss:\t" %}
16758
16759 ins_encode %{
16760 Label miss;
16761 // NB: Callers may assume that, when $result is a valid register,
16762 // check_klass_subtype_slow_path_linear sets it to a nonzero
16763 // value.
16764 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16765 $rcx$$Register, $result$$Register,
16766 nullptr, &miss,
16767 /*set_cond_codes:*/ true);
16768 __ xorptr($result$$Register, $result$$Register);
16769 __ bind(miss);
16770 %}
16771
16772 ins_pipe(pipe_slow);
16773 %}
16774
16775 // ============================================================================
16776 // Two versions of hashtable-based partialSubtypeCheck, both used when
16777 // we need to search for a super class in the secondary supers array.
16778 // The first is used when we don't know _a priori_ the class being
16779 // searched for. The second, far more common, is used when we do know:
16780 // this is used for instanceof, checkcast, and any case where C2 can
16781 // determine it by constant propagation.
16782
16783 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16784 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16785 rFlagsReg cr)
16786 %{
16787 match(Set result (PartialSubtypeCheck sub super));
16788 predicate(UseSecondarySupersTable);
16789 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16790
16791 ins_cost(1000);
16792 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16793
16794 ins_encode %{
16795 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16796 $temp3$$Register, $temp4$$Register, $result$$Register);
16797 %}
16798
16799 ins_pipe(pipe_slow);
16800 %}
16801
16802 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16803 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16804 rFlagsReg cr)
16805 %{
16806 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16807 predicate(UseSecondarySupersTable);
16808 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16809
16810 ins_cost(700); // smaller than the next version
16811 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16812
16813 ins_encode %{
16814 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16815 if (InlineSecondarySupersTest) {
16816 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16817 $temp3$$Register, $temp4$$Register, $result$$Register,
16818 super_klass_slot);
16819 } else {
16820 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16821 }
16822 %}
16823
16824 ins_pipe(pipe_slow);
16825 %}
16826
16827 // ============================================================================
16828 // Branch Instructions -- short offset versions
16829 //
16830 // These instructions are used to replace jumps of a long offset (the default
16831 // match) with jumps of a shorter offset. These instructions are all tagged
16832 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16833 // match rules in general matching. Instead, the ADLC generates a conversion
16834 // method in the MachNode which can be used to do in-place replacement of the
16835 // long variant with the shorter variant. The compiler will determine if a
16836 // branch can be taken by the is_short_branch_offset() predicate in the machine
16837 // specific code section of the file.
16838
16839 // Jump Direct - Label defines a relative address from JMP+1
16840 instruct jmpDir_short(label labl) %{
16841 match(Goto);
16842 effect(USE labl);
16843
16844 ins_cost(300);
16845 format %{ "jmp,s $labl" %}
16846 size(2);
16847 ins_encode %{
16848 Label* L = $labl$$label;
16849 __ jmpb(*L);
16850 %}
16851 ins_pipe(pipe_jmp);
16852 ins_short_branch(1);
16853 %}
16854
16855 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16856 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16857 match(If cop cr);
16858 effect(USE labl);
16859
16860 ins_cost(300);
16861 format %{ "j$cop,s $labl" %}
16862 size(2);
16863 ins_encode %{
16864 Label* L = $labl$$label;
16865 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16866 %}
16867 ins_pipe(pipe_jcc);
16868 ins_short_branch(1);
16869 %}
16870
16871 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16872 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16873 match(CountedLoopEnd cop cr);
16874 effect(USE labl);
16875
16876 ins_cost(300);
16877 format %{ "j$cop,s $labl\t# loop end" %}
16878 size(2);
16879 ins_encode %{
16880 Label* L = $labl$$label;
16881 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16882 %}
16883 ins_pipe(pipe_jcc);
16884 ins_short_branch(1);
16885 %}
16886
16887 // Jump Direct Conditional - using unsigned comparison
16888 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16889 match(If cop cmp);
16890 effect(USE labl);
16891
16892 ins_cost(300);
16893 format %{ "j$cop,us $labl" %}
16894 size(2);
16895 ins_encode %{
16896 Label* L = $labl$$label;
16897 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16898 %}
16899 ins_pipe(pipe_jcc);
16900 ins_short_branch(1);
16901 %}
16902
16903 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16904 match(If cop cmp);
16905 effect(USE labl);
16906
16907 ins_cost(300);
16908 format %{ "j$cop,us $labl" %}
16909 size(2);
16910 ins_encode %{
16911 Label* L = $labl$$label;
16912 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16913 %}
16914 ins_pipe(pipe_jcc);
16915 ins_short_branch(1);
16916 %}
16917
16918 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16919 match(If cop cmp);
16920 effect(USE labl);
16921
16922 ins_cost(300);
16923 format %{ $$template
16924 if ($cop$$cmpcode == Assembler::notEqual) {
16925 $$emit$$"jp,u,s $labl\n\t"
16926 $$emit$$"j$cop,u,s $labl"
16927 } else {
16928 $$emit$$"jp,u,s done\n\t"
16929 $$emit$$"j$cop,u,s $labl\n\t"
16930 $$emit$$"done:"
16931 }
16932 %}
16933 size(4);
16934 ins_encode %{
16935 Label* l = $labl$$label;
16936 if ($cop$$cmpcode == Assembler::notEqual) {
16937 __ jccb(Assembler::parity, *l);
16938 __ jccb(Assembler::notEqual, *l);
16939 } else if ($cop$$cmpcode == Assembler::equal) {
16940 Label done;
16941 __ jccb(Assembler::parity, done);
16942 __ jccb(Assembler::equal, *l);
16943 __ bind(done);
16944 } else {
16945 ShouldNotReachHere();
16946 }
16947 %}
16948 ins_pipe(pipe_jcc);
16949 ins_short_branch(1);
16950 %}
16951
16952 // Jump Direct Conditional - using signed and unsigned comparison
16953 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16954 match(If cop cmp);
16955 effect(USE labl);
16956
16957 ins_cost(300);
16958 format %{ "j$cop,sus $labl" %}
16959 size(2);
16960 ins_encode %{
16961 Label* L = $labl$$label;
16962 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16963 %}
16964 ins_pipe(pipe_jcc);
16965 ins_short_branch(1);
16966 %}
16967
16968 // ============================================================================
16969 // inlined locking and unlocking
16970
16971 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16972 match(Set cr (FastLock object box));
16973 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16974 ins_cost(300);
16975 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16976 ins_encode %{
16977 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16978 %}
16979 ins_pipe(pipe_slow);
16980 %}
16981
16982 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16983 match(Set cr (FastUnlock object rax_reg));
16984 effect(TEMP tmp, USE_KILL rax_reg);
16985 ins_cost(300);
16986 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16987 ins_encode %{
16988 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16989 %}
16990 ins_pipe(pipe_slow);
16991 %}
16992
16993
16994 // ============================================================================
16995 // Safepoint Instructions
16996 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16997 %{
16998 match(SafePoint poll);
16999 effect(KILL cr, USE poll);
17000
17001 format %{ "testl rax, [$poll]\t"
17002 "# Safepoint: poll for GC" %}
17003 ins_cost(125);
17004 ins_encode %{
17005 __ relocate(relocInfo::poll_type);
17006 address pre_pc = __ pc();
17007 __ testl(rax, Address($poll$$Register, 0));
17008 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17009 %}
17010 ins_pipe(ialu_reg_mem);
17011 %}
17012
17013 instruct mask_all_evexL(kReg dst, rRegL src) %{
17014 match(Set dst (MaskAll src));
17015 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17016 ins_encode %{
17017 int mask_len = Matcher::vector_length(this);
17018 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17019 %}
17020 ins_pipe( pipe_slow );
17021 %}
17022
17023 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17024 predicate(Matcher::vector_length(n) > 32);
17025 match(Set dst (MaskAll src));
17026 effect(TEMP tmp);
17027 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17028 ins_encode %{
17029 int mask_len = Matcher::vector_length(this);
17030 __ movslq($tmp$$Register, $src$$Register);
17031 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17032 %}
17033 ins_pipe( pipe_slow );
17034 %}
17035
17036 // ============================================================================
17037 // Procedure Call/Return Instructions
17038 // Call Java Static Instruction
17039 // Note: If this code changes, the corresponding ret_addr_offset() and
17040 // compute_padding() functions will have to be adjusted.
17041 instruct CallStaticJavaDirect(method meth) %{
17042 match(CallStaticJava);
17043 effect(USE meth);
17044
17045 ins_cost(300);
17046 format %{ "call,static " %}
17047 opcode(0xE8); /* E8 cd */
17048 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17049 ins_pipe(pipe_slow);
17050 ins_alignment(4);
17051 %}
17052
17053 // Call Java Dynamic Instruction
17054 // Note: If this code changes, the corresponding ret_addr_offset() and
17055 // compute_padding() functions will have to be adjusted.
17056 instruct CallDynamicJavaDirect(method meth)
17057 %{
17058 match(CallDynamicJava);
17059 effect(USE meth);
17060
17061 ins_cost(300);
17062 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17063 "call,dynamic " %}
17064 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17065 ins_pipe(pipe_slow);
17066 ins_alignment(4);
17067 %}
17068
17069 // Call Runtime Instruction
17070 instruct CallRuntimeDirect(method meth)
17071 %{
17072 match(CallRuntime);
17073 effect(USE meth);
17074
17075 ins_cost(300);
17076 format %{ "call,runtime " %}
17077 ins_encode(clear_avx, Java_To_Runtime(meth));
17078 ins_pipe(pipe_slow);
17079 %}
17080
17081 // Call runtime without safepoint
17082 instruct CallLeafDirect(method meth)
17083 %{
17084 match(CallLeaf);
17085 effect(USE meth);
17086
17087 ins_cost(300);
17088 format %{ "call_leaf,runtime " %}
17089 ins_encode(clear_avx, Java_To_Runtime(meth));
17090 ins_pipe(pipe_slow);
17091 %}
17092
17093 // Call runtime without safepoint and with vector arguments
17094 instruct CallLeafDirectVector(method meth)
17095 %{
17096 match(CallLeafVector);
17097 effect(USE meth);
17098
17099 ins_cost(300);
17100 format %{ "call_leaf,vector " %}
17101 ins_encode(Java_To_Runtime(meth));
17102 ins_pipe(pipe_slow);
17103 %}
17104
17105 // Call runtime without safepoint
17106 // entry point is null, target holds the address to call
17107 instruct CallLeafNoFPInDirect(rRegP target)
17108 %{
17109 predicate(n->as_Call()->entry_point() == nullptr);
17110 match(CallLeafNoFP target);
17111
17112 ins_cost(300);
17113 format %{ "call_leaf_nofp,runtime indirect " %}
17114 ins_encode %{
17115 __ call($target$$Register);
17116 %}
17117
17118 ins_pipe(pipe_slow);
17119 %}
17120
17121 // Call runtime without safepoint
17122 instruct CallLeafNoFPDirect(method meth)
17123 %{
17124 predicate(n->as_Call()->entry_point() != nullptr);
17125 match(CallLeafNoFP);
17126 effect(USE meth);
17127
17128 ins_cost(300);
17129 format %{ "call_leaf_nofp,runtime " %}
17130 ins_encode(clear_avx, Java_To_Runtime(meth));
17131 ins_pipe(pipe_slow);
17132 %}
17133
17134 // Return Instruction
17135 // Remove the return address & jump to it.
17136 // Notice: We always emit a nop after a ret to make sure there is room
17137 // for safepoint patching
17138 instruct Ret()
17139 %{
17140 match(Return);
17141
17142 format %{ "ret" %}
17143 ins_encode %{
17144 __ ret(0);
17145 %}
17146 ins_pipe(pipe_jmp);
17147 %}
17148
17149 // Tail Call; Jump from runtime stub to Java code.
17150 // Also known as an 'interprocedural jump'.
17151 // Target of jump will eventually return to caller.
17152 // TailJump below removes the return address.
17153 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17154 // emitted just above the TailCall which has reset rbp to the caller state.
17155 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17156 %{
17157 match(TailCall jump_target method_ptr);
17158
17159 ins_cost(300);
17160 format %{ "jmp $jump_target\t# rbx holds method" %}
17161 ins_encode %{
17162 __ jmp($jump_target$$Register);
17163 %}
17164 ins_pipe(pipe_jmp);
17165 %}
17166
17167 // Tail Jump; remove the return address; jump to target.
17168 // TailCall above leaves the return address around.
17169 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17170 %{
17171 match(TailJump jump_target ex_oop);
17172
17173 ins_cost(300);
17174 format %{ "popq rdx\t# pop return address\n\t"
17175 "jmp $jump_target" %}
17176 ins_encode %{
17177 __ popq(as_Register(RDX_enc));
17178 __ jmp($jump_target$$Register);
17179 %}
17180 ins_pipe(pipe_jmp);
17181 %}
17182
17183 // Forward exception.
17184 instruct ForwardExceptionjmp()
17185 %{
17186 match(ForwardException);
17187
17188 format %{ "jmp forward_exception_stub" %}
17189 ins_encode %{
17190 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17191 %}
17192 ins_pipe(pipe_jmp);
17193 %}
17194
17195 // Create exception oop: created by stack-crawling runtime code.
17196 // Created exception is now available to this handler, and is setup
17197 // just prior to jumping to this handler. No code emitted.
17198 instruct CreateException(rax_RegP ex_oop)
17199 %{
17200 match(Set ex_oop (CreateEx));
17201
17202 size(0);
17203 // use the following format syntax
17204 format %{ "# exception oop is in rax; no code emitted" %}
17205 ins_encode();
17206 ins_pipe(empty);
17207 %}
17208
17209 // Rethrow exception:
17210 // The exception oop will come in the first argument position.
17211 // Then JUMP (not call) to the rethrow stub code.
17212 instruct RethrowException()
17213 %{
17214 match(Rethrow);
17215
17216 // use the following format syntax
17217 format %{ "jmp rethrow_stub" %}
17218 ins_encode %{
17219 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17220 %}
17221 ins_pipe(pipe_jmp);
17222 %}
17223
17224 // ============================================================================
17225 // This name is KNOWN by the ADLC and cannot be changed.
17226 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17227 // for this guy.
17228 instruct tlsLoadP(r15_RegP dst) %{
17229 match(Set dst (ThreadLocal));
17230 effect(DEF dst);
17231
17232 size(0);
17233 format %{ "# TLS is in R15" %}
17234 ins_encode( /*empty encoding*/ );
17235 ins_pipe(ialu_reg_reg);
17236 %}
17237
17238 instruct addF_reg(regF dst, regF src) %{
17239 predicate(UseAVX == 0);
17240 match(Set dst (AddF dst src));
17241
17242 format %{ "addss $dst, $src" %}
17243 ins_cost(150);
17244 ins_encode %{
17245 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17246 %}
17247 ins_pipe(pipe_slow);
17248 %}
17249
17250 instruct addF_mem(regF dst, memory src) %{
17251 predicate(UseAVX == 0);
17252 match(Set dst (AddF dst (LoadF src)));
17253
17254 format %{ "addss $dst, $src" %}
17255 ins_cost(150);
17256 ins_encode %{
17257 __ addss($dst$$XMMRegister, $src$$Address);
17258 %}
17259 ins_pipe(pipe_slow);
17260 %}
17261
17262 instruct addF_imm(regF dst, immF con) %{
17263 predicate(UseAVX == 0);
17264 match(Set dst (AddF dst con));
17265 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17266 ins_cost(150);
17267 ins_encode %{
17268 __ addss($dst$$XMMRegister, $constantaddress($con));
17269 %}
17270 ins_pipe(pipe_slow);
17271 %}
17272
17273 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17274 predicate(UseAVX > 0);
17275 match(Set dst (AddF src1 src2));
17276
17277 format %{ "vaddss $dst, $src1, $src2" %}
17278 ins_cost(150);
17279 ins_encode %{
17280 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17281 %}
17282 ins_pipe(pipe_slow);
17283 %}
17284
17285 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17286 predicate(UseAVX > 0);
17287 match(Set dst (AddF src1 (LoadF src2)));
17288
17289 format %{ "vaddss $dst, $src1, $src2" %}
17290 ins_cost(150);
17291 ins_encode %{
17292 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17293 %}
17294 ins_pipe(pipe_slow);
17295 %}
17296
17297 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17298 predicate(UseAVX > 0);
17299 match(Set dst (AddF src con));
17300
17301 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17302 ins_cost(150);
17303 ins_encode %{
17304 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17305 %}
17306 ins_pipe(pipe_slow);
17307 %}
17308
17309 instruct addD_reg(regD dst, regD src) %{
17310 predicate(UseAVX == 0);
17311 match(Set dst (AddD dst src));
17312
17313 format %{ "addsd $dst, $src" %}
17314 ins_cost(150);
17315 ins_encode %{
17316 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17317 %}
17318 ins_pipe(pipe_slow);
17319 %}
17320
17321 instruct addD_mem(regD dst, memory src) %{
17322 predicate(UseAVX == 0);
17323 match(Set dst (AddD dst (LoadD src)));
17324
17325 format %{ "addsd $dst, $src" %}
17326 ins_cost(150);
17327 ins_encode %{
17328 __ addsd($dst$$XMMRegister, $src$$Address);
17329 %}
17330 ins_pipe(pipe_slow);
17331 %}
17332
17333 instruct addD_imm(regD dst, immD con) %{
17334 predicate(UseAVX == 0);
17335 match(Set dst (AddD dst con));
17336 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17337 ins_cost(150);
17338 ins_encode %{
17339 __ addsd($dst$$XMMRegister, $constantaddress($con));
17340 %}
17341 ins_pipe(pipe_slow);
17342 %}
17343
17344 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17345 predicate(UseAVX > 0);
17346 match(Set dst (AddD src1 src2));
17347
17348 format %{ "vaddsd $dst, $src1, $src2" %}
17349 ins_cost(150);
17350 ins_encode %{
17351 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17352 %}
17353 ins_pipe(pipe_slow);
17354 %}
17355
17356 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17357 predicate(UseAVX > 0);
17358 match(Set dst (AddD src1 (LoadD src2)));
17359
17360 format %{ "vaddsd $dst, $src1, $src2" %}
17361 ins_cost(150);
17362 ins_encode %{
17363 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17364 %}
17365 ins_pipe(pipe_slow);
17366 %}
17367
17368 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17369 predicate(UseAVX > 0);
17370 match(Set dst (AddD src con));
17371
17372 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17373 ins_cost(150);
17374 ins_encode %{
17375 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17376 %}
17377 ins_pipe(pipe_slow);
17378 %}
17379
17380 instruct subF_reg(regF dst, regF src) %{
17381 predicate(UseAVX == 0);
17382 match(Set dst (SubF dst src));
17383
17384 format %{ "subss $dst, $src" %}
17385 ins_cost(150);
17386 ins_encode %{
17387 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17388 %}
17389 ins_pipe(pipe_slow);
17390 %}
17391
17392 instruct subF_mem(regF dst, memory src) %{
17393 predicate(UseAVX == 0);
17394 match(Set dst (SubF dst (LoadF src)));
17395
17396 format %{ "subss $dst, $src" %}
17397 ins_cost(150);
17398 ins_encode %{
17399 __ subss($dst$$XMMRegister, $src$$Address);
17400 %}
17401 ins_pipe(pipe_slow);
17402 %}
17403
17404 instruct subF_imm(regF dst, immF con) %{
17405 predicate(UseAVX == 0);
17406 match(Set dst (SubF dst con));
17407 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17408 ins_cost(150);
17409 ins_encode %{
17410 __ subss($dst$$XMMRegister, $constantaddress($con));
17411 %}
17412 ins_pipe(pipe_slow);
17413 %}
17414
17415 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17416 predicate(UseAVX > 0);
17417 match(Set dst (SubF src1 src2));
17418
17419 format %{ "vsubss $dst, $src1, $src2" %}
17420 ins_cost(150);
17421 ins_encode %{
17422 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17423 %}
17424 ins_pipe(pipe_slow);
17425 %}
17426
17427 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17428 predicate(UseAVX > 0);
17429 match(Set dst (SubF src1 (LoadF src2)));
17430
17431 format %{ "vsubss $dst, $src1, $src2" %}
17432 ins_cost(150);
17433 ins_encode %{
17434 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17435 %}
17436 ins_pipe(pipe_slow);
17437 %}
17438
17439 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17440 predicate(UseAVX > 0);
17441 match(Set dst (SubF src con));
17442
17443 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17444 ins_cost(150);
17445 ins_encode %{
17446 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17447 %}
17448 ins_pipe(pipe_slow);
17449 %}
17450
17451 instruct subD_reg(regD dst, regD src) %{
17452 predicate(UseAVX == 0);
17453 match(Set dst (SubD dst src));
17454
17455 format %{ "subsd $dst, $src" %}
17456 ins_cost(150);
17457 ins_encode %{
17458 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17459 %}
17460 ins_pipe(pipe_slow);
17461 %}
17462
17463 instruct subD_mem(regD dst, memory src) %{
17464 predicate(UseAVX == 0);
17465 match(Set dst (SubD dst (LoadD src)));
17466
17467 format %{ "subsd $dst, $src" %}
17468 ins_cost(150);
17469 ins_encode %{
17470 __ subsd($dst$$XMMRegister, $src$$Address);
17471 %}
17472 ins_pipe(pipe_slow);
17473 %}
17474
17475 instruct subD_imm(regD dst, immD con) %{
17476 predicate(UseAVX == 0);
17477 match(Set dst (SubD dst con));
17478 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17479 ins_cost(150);
17480 ins_encode %{
17481 __ subsd($dst$$XMMRegister, $constantaddress($con));
17482 %}
17483 ins_pipe(pipe_slow);
17484 %}
17485
17486 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17487 predicate(UseAVX > 0);
17488 match(Set dst (SubD src1 src2));
17489
17490 format %{ "vsubsd $dst, $src1, $src2" %}
17491 ins_cost(150);
17492 ins_encode %{
17493 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17494 %}
17495 ins_pipe(pipe_slow);
17496 %}
17497
17498 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17499 predicate(UseAVX > 0);
17500 match(Set dst (SubD src1 (LoadD src2)));
17501
17502 format %{ "vsubsd $dst, $src1, $src2" %}
17503 ins_cost(150);
17504 ins_encode %{
17505 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17506 %}
17507 ins_pipe(pipe_slow);
17508 %}
17509
17510 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17511 predicate(UseAVX > 0);
17512 match(Set dst (SubD src con));
17513
17514 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17515 ins_cost(150);
17516 ins_encode %{
17517 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17518 %}
17519 ins_pipe(pipe_slow);
17520 %}
17521
17522 instruct mulF_reg(regF dst, regF src) %{
17523 predicate(UseAVX == 0);
17524 match(Set dst (MulF dst src));
17525
17526 format %{ "mulss $dst, $src" %}
17527 ins_cost(150);
17528 ins_encode %{
17529 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17530 %}
17531 ins_pipe(pipe_slow);
17532 %}
17533
17534 instruct mulF_mem(regF dst, memory src) %{
17535 predicate(UseAVX == 0);
17536 match(Set dst (MulF dst (LoadF src)));
17537
17538 format %{ "mulss $dst, $src" %}
17539 ins_cost(150);
17540 ins_encode %{
17541 __ mulss($dst$$XMMRegister, $src$$Address);
17542 %}
17543 ins_pipe(pipe_slow);
17544 %}
17545
17546 instruct mulF_imm(regF dst, immF con) %{
17547 predicate(UseAVX == 0);
17548 match(Set dst (MulF dst con));
17549 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17550 ins_cost(150);
17551 ins_encode %{
17552 __ mulss($dst$$XMMRegister, $constantaddress($con));
17553 %}
17554 ins_pipe(pipe_slow);
17555 %}
17556
17557 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17558 predicate(UseAVX > 0);
17559 match(Set dst (MulF src1 src2));
17560
17561 format %{ "vmulss $dst, $src1, $src2" %}
17562 ins_cost(150);
17563 ins_encode %{
17564 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17565 %}
17566 ins_pipe(pipe_slow);
17567 %}
17568
17569 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17570 predicate(UseAVX > 0);
17571 match(Set dst (MulF src1 (LoadF src2)));
17572
17573 format %{ "vmulss $dst, $src1, $src2" %}
17574 ins_cost(150);
17575 ins_encode %{
17576 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17577 %}
17578 ins_pipe(pipe_slow);
17579 %}
17580
17581 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17582 predicate(UseAVX > 0);
17583 match(Set dst (MulF src con));
17584
17585 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17586 ins_cost(150);
17587 ins_encode %{
17588 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17589 %}
17590 ins_pipe(pipe_slow);
17591 %}
17592
17593 instruct mulD_reg(regD dst, regD src) %{
17594 predicate(UseAVX == 0);
17595 match(Set dst (MulD dst src));
17596
17597 format %{ "mulsd $dst, $src" %}
17598 ins_cost(150);
17599 ins_encode %{
17600 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17601 %}
17602 ins_pipe(pipe_slow);
17603 %}
17604
17605 instruct mulD_mem(regD dst, memory src) %{
17606 predicate(UseAVX == 0);
17607 match(Set dst (MulD dst (LoadD src)));
17608
17609 format %{ "mulsd $dst, $src" %}
17610 ins_cost(150);
17611 ins_encode %{
17612 __ mulsd($dst$$XMMRegister, $src$$Address);
17613 %}
17614 ins_pipe(pipe_slow);
17615 %}
17616
17617 instruct mulD_imm(regD dst, immD con) %{
17618 predicate(UseAVX == 0);
17619 match(Set dst (MulD dst con));
17620 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17621 ins_cost(150);
17622 ins_encode %{
17623 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17624 %}
17625 ins_pipe(pipe_slow);
17626 %}
17627
17628 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17629 predicate(UseAVX > 0);
17630 match(Set dst (MulD src1 src2));
17631
17632 format %{ "vmulsd $dst, $src1, $src2" %}
17633 ins_cost(150);
17634 ins_encode %{
17635 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17636 %}
17637 ins_pipe(pipe_slow);
17638 %}
17639
17640 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17641 predicate(UseAVX > 0);
17642 match(Set dst (MulD src1 (LoadD src2)));
17643
17644 format %{ "vmulsd $dst, $src1, $src2" %}
17645 ins_cost(150);
17646 ins_encode %{
17647 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17648 %}
17649 ins_pipe(pipe_slow);
17650 %}
17651
17652 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17653 predicate(UseAVX > 0);
17654 match(Set dst (MulD src con));
17655
17656 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17657 ins_cost(150);
17658 ins_encode %{
17659 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17660 %}
17661 ins_pipe(pipe_slow);
17662 %}
17663
17664 instruct divF_reg(regF dst, regF src) %{
17665 predicate(UseAVX == 0);
17666 match(Set dst (DivF dst src));
17667
17668 format %{ "divss $dst, $src" %}
17669 ins_cost(150);
17670 ins_encode %{
17671 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17672 %}
17673 ins_pipe(pipe_slow);
17674 %}
17675
17676 instruct divF_mem(regF dst, memory src) %{
17677 predicate(UseAVX == 0);
17678 match(Set dst (DivF dst (LoadF src)));
17679
17680 format %{ "divss $dst, $src" %}
17681 ins_cost(150);
17682 ins_encode %{
17683 __ divss($dst$$XMMRegister, $src$$Address);
17684 %}
17685 ins_pipe(pipe_slow);
17686 %}
17687
17688 instruct divF_imm(regF dst, immF con) %{
17689 predicate(UseAVX == 0);
17690 match(Set dst (DivF dst con));
17691 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17692 ins_cost(150);
17693 ins_encode %{
17694 __ divss($dst$$XMMRegister, $constantaddress($con));
17695 %}
17696 ins_pipe(pipe_slow);
17697 %}
17698
17699 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17700 predicate(UseAVX > 0);
17701 match(Set dst (DivF src1 src2));
17702
17703 format %{ "vdivss $dst, $src1, $src2" %}
17704 ins_cost(150);
17705 ins_encode %{
17706 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17707 %}
17708 ins_pipe(pipe_slow);
17709 %}
17710
17711 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17712 predicate(UseAVX > 0);
17713 match(Set dst (DivF src1 (LoadF src2)));
17714
17715 format %{ "vdivss $dst, $src1, $src2" %}
17716 ins_cost(150);
17717 ins_encode %{
17718 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17719 %}
17720 ins_pipe(pipe_slow);
17721 %}
17722
17723 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17724 predicate(UseAVX > 0);
17725 match(Set dst (DivF src con));
17726
17727 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17728 ins_cost(150);
17729 ins_encode %{
17730 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17731 %}
17732 ins_pipe(pipe_slow);
17733 %}
17734
17735 instruct divD_reg(regD dst, regD src) %{
17736 predicate(UseAVX == 0);
17737 match(Set dst (DivD dst src));
17738
17739 format %{ "divsd $dst, $src" %}
17740 ins_cost(150);
17741 ins_encode %{
17742 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17743 %}
17744 ins_pipe(pipe_slow);
17745 %}
17746
17747 instruct divD_mem(regD dst, memory src) %{
17748 predicate(UseAVX == 0);
17749 match(Set dst (DivD dst (LoadD src)));
17750
17751 format %{ "divsd $dst, $src" %}
17752 ins_cost(150);
17753 ins_encode %{
17754 __ divsd($dst$$XMMRegister, $src$$Address);
17755 %}
17756 ins_pipe(pipe_slow);
17757 %}
17758
17759 instruct divD_imm(regD dst, immD con) %{
17760 predicate(UseAVX == 0);
17761 match(Set dst (DivD dst con));
17762 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17763 ins_cost(150);
17764 ins_encode %{
17765 __ divsd($dst$$XMMRegister, $constantaddress($con));
17766 %}
17767 ins_pipe(pipe_slow);
17768 %}
17769
17770 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17771 predicate(UseAVX > 0);
17772 match(Set dst (DivD src1 src2));
17773
17774 format %{ "vdivsd $dst, $src1, $src2" %}
17775 ins_cost(150);
17776 ins_encode %{
17777 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17778 %}
17779 ins_pipe(pipe_slow);
17780 %}
17781
17782 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17783 predicate(UseAVX > 0);
17784 match(Set dst (DivD src1 (LoadD src2)));
17785
17786 format %{ "vdivsd $dst, $src1, $src2" %}
17787 ins_cost(150);
17788 ins_encode %{
17789 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17790 %}
17791 ins_pipe(pipe_slow);
17792 %}
17793
17794 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17795 predicate(UseAVX > 0);
17796 match(Set dst (DivD src con));
17797
17798 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17799 ins_cost(150);
17800 ins_encode %{
17801 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17802 %}
17803 ins_pipe(pipe_slow);
17804 %}
17805
17806 instruct absF_reg(regF dst) %{
17807 predicate(UseAVX == 0);
17808 match(Set dst (AbsF dst));
17809 ins_cost(150);
17810 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17811 ins_encode %{
17812 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17813 %}
17814 ins_pipe(pipe_slow);
17815 %}
17816
17817 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17818 predicate(UseAVX > 0);
17819 match(Set dst (AbsF src));
17820 ins_cost(150);
17821 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17822 ins_encode %{
17823 int vlen_enc = Assembler::AVX_128bit;
17824 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17825 ExternalAddress(float_signmask()), vlen_enc);
17826 %}
17827 ins_pipe(pipe_slow);
17828 %}
17829
17830 instruct absD_reg(regD dst) %{
17831 predicate(UseAVX == 0);
17832 match(Set dst (AbsD dst));
17833 ins_cost(150);
17834 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17835 "# abs double by sign masking" %}
17836 ins_encode %{
17837 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17838 %}
17839 ins_pipe(pipe_slow);
17840 %}
17841
17842 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17843 predicate(UseAVX > 0);
17844 match(Set dst (AbsD src));
17845 ins_cost(150);
17846 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17847 "# abs double by sign masking" %}
17848 ins_encode %{
17849 int vlen_enc = Assembler::AVX_128bit;
17850 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17851 ExternalAddress(double_signmask()), vlen_enc);
17852 %}
17853 ins_pipe(pipe_slow);
17854 %}
17855
17856 instruct negF_reg(regF dst) %{
17857 predicate(UseAVX == 0);
17858 match(Set dst (NegF dst));
17859 ins_cost(150);
17860 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17861 ins_encode %{
17862 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17863 %}
17864 ins_pipe(pipe_slow);
17865 %}
17866
17867 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17868 predicate(UseAVX > 0);
17869 match(Set dst (NegF src));
17870 ins_cost(150);
17871 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17872 ins_encode %{
17873 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17874 ExternalAddress(float_signflip()));
17875 %}
17876 ins_pipe(pipe_slow);
17877 %}
17878
17879 instruct negD_reg(regD dst) %{
17880 predicate(UseAVX == 0);
17881 match(Set dst (NegD dst));
17882 ins_cost(150);
17883 format %{ "xorpd $dst, [0x8000000000000000]\t"
17884 "# neg double by sign flipping" %}
17885 ins_encode %{
17886 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17887 %}
17888 ins_pipe(pipe_slow);
17889 %}
17890
17891 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17892 predicate(UseAVX > 0);
17893 match(Set dst (NegD src));
17894 ins_cost(150);
17895 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17896 "# neg double by sign flipping" %}
17897 ins_encode %{
17898 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17899 ExternalAddress(double_signflip()));
17900 %}
17901 ins_pipe(pipe_slow);
17902 %}
17903
17904 // sqrtss instruction needs destination register to be pre initialized for best performance
17905 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17906 instruct sqrtF_reg(regF dst) %{
17907 match(Set dst (SqrtF dst));
17908 format %{ "sqrtss $dst, $dst" %}
17909 ins_encode %{
17910 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17911 %}
17912 ins_pipe(pipe_slow);
17913 %}
17914
17915 // sqrtsd instruction needs destination register to be pre initialized for best performance
17916 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17917 instruct sqrtD_reg(regD dst) %{
17918 match(Set dst (SqrtD dst));
17919 format %{ "sqrtsd $dst, $dst" %}
17920 ins_encode %{
17921 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17922 %}
17923 ins_pipe(pipe_slow);
17924 %}
17925
17926 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17927 effect(TEMP tmp);
17928 match(Set dst (ConvF2HF src));
17929 ins_cost(125);
17930 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17931 ins_encode %{
17932 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17933 %}
17934 ins_pipe( pipe_slow );
17935 %}
17936
17937 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17938 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17939 effect(TEMP ktmp, TEMP rtmp);
17940 match(Set mem (StoreC mem (ConvF2HF src)));
17941 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17942 ins_encode %{
17943 __ movl($rtmp$$Register, 0x1);
17944 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17945 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17946 %}
17947 ins_pipe( pipe_slow );
17948 %}
17949
17950 instruct vconvF2HF(vec dst, vec src) %{
17951 match(Set dst (VectorCastF2HF src));
17952 format %{ "vector_conv_F2HF $dst $src" %}
17953 ins_encode %{
17954 int vlen_enc = vector_length_encoding(this, $src);
17955 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17956 %}
17957 ins_pipe( pipe_slow );
17958 %}
17959
17960 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17961 predicate(n->as_StoreVector()->memory_size() >= 16);
17962 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17963 format %{ "vcvtps2ph $mem,$src" %}
17964 ins_encode %{
17965 int vlen_enc = vector_length_encoding(this, $src);
17966 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17967 %}
17968 ins_pipe( pipe_slow );
17969 %}
17970
17971 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17972 match(Set dst (ConvHF2F src));
17973 format %{ "vcvtph2ps $dst,$src" %}
17974 ins_encode %{
17975 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17976 %}
17977 ins_pipe( pipe_slow );
17978 %}
17979
17980 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17981 match(Set dst (VectorCastHF2F (LoadVector mem)));
17982 format %{ "vcvtph2ps $dst,$mem" %}
17983 ins_encode %{
17984 int vlen_enc = vector_length_encoding(this);
17985 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17986 %}
17987 ins_pipe( pipe_slow );
17988 %}
17989
17990 instruct vconvHF2F(vec dst, vec src) %{
17991 match(Set dst (VectorCastHF2F src));
17992 ins_cost(125);
17993 format %{ "vector_conv_HF2F $dst,$src" %}
17994 ins_encode %{
17995 int vlen_enc = vector_length_encoding(this);
17996 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17997 %}
17998 ins_pipe( pipe_slow );
17999 %}
18000
18001 // ---------------------------------------- VectorReinterpret ------------------------------------
18002 instruct reinterpret_mask(kReg dst) %{
18003 predicate(n->bottom_type()->isa_pvectmask() &&
18004 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18005 match(Set dst (VectorReinterpret dst));
18006 ins_cost(125);
18007 format %{ "vector_reinterpret $dst\t!" %}
18008 ins_encode %{
18009 // empty
18010 %}
18011 ins_pipe( pipe_slow );
18012 %}
18013
18014 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18015 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18016 n->bottom_type()->isa_pvectmask() &&
18017 n->in(1)->bottom_type()->isa_pvectmask() &&
18018 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18019 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18020 match(Set dst (VectorReinterpret src));
18021 effect(TEMP xtmp);
18022 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18023 ins_encode %{
18024 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18025 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18026 assert(src_sz == dst_sz , "src and dst size mismatch");
18027 int vlen_enc = vector_length_encoding(src_sz);
18028 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18029 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18030 %}
18031 ins_pipe( pipe_slow );
18032 %}
18033
18034 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18035 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18036 n->bottom_type()->isa_pvectmask() &&
18037 n->in(1)->bottom_type()->isa_pvectmask() &&
18038 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18039 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18040 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18041 match(Set dst (VectorReinterpret src));
18042 effect(TEMP xtmp);
18043 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18044 ins_encode %{
18045 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18046 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18047 assert(src_sz == dst_sz , "src and dst size mismatch");
18048 int vlen_enc = vector_length_encoding(src_sz);
18049 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18050 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18051 %}
18052 ins_pipe( pipe_slow );
18053 %}
18054
18055 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18056 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18057 n->bottom_type()->isa_pvectmask() &&
18058 n->in(1)->bottom_type()->isa_pvectmask() &&
18059 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18060 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18061 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18062 match(Set dst (VectorReinterpret src));
18063 effect(TEMP xtmp);
18064 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18065 ins_encode %{
18066 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18067 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18068 assert(src_sz == dst_sz , "src and dst size mismatch");
18069 int vlen_enc = vector_length_encoding(src_sz);
18070 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18071 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18072 %}
18073 ins_pipe( pipe_slow );
18074 %}
18075
18076 instruct reinterpret(vec dst) %{
18077 predicate(!n->bottom_type()->isa_pvectmask() &&
18078 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18079 match(Set dst (VectorReinterpret dst));
18080 ins_cost(125);
18081 format %{ "vector_reinterpret $dst\t!" %}
18082 ins_encode %{
18083 // empty
18084 %}
18085 ins_pipe( pipe_slow );
18086 %}
18087
18088 instruct reinterpret_expand(vec dst, vec src) %{
18089 predicate(UseAVX == 0 &&
18090 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18091 match(Set dst (VectorReinterpret src));
18092 ins_cost(125);
18093 effect(TEMP dst);
18094 format %{ "vector_reinterpret_expand $dst,$src" %}
18095 ins_encode %{
18096 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18097 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18098
18099 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18100 if (src_vlen_in_bytes == 4) {
18101 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18102 } else {
18103 assert(src_vlen_in_bytes == 8, "");
18104 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18105 }
18106 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18107 %}
18108 ins_pipe( pipe_slow );
18109 %}
18110
18111 instruct vreinterpret_expand4(legVec dst, vec src) %{
18112 predicate(UseAVX > 0 &&
18113 !n->bottom_type()->isa_pvectmask() &&
18114 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18115 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18116 match(Set dst (VectorReinterpret src));
18117 ins_cost(125);
18118 format %{ "vector_reinterpret_expand $dst,$src" %}
18119 ins_encode %{
18120 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18121 %}
18122 ins_pipe( pipe_slow );
18123 %}
18124
18125
18126 instruct vreinterpret_expand(legVec dst, vec src) %{
18127 predicate(UseAVX > 0 &&
18128 !n->bottom_type()->isa_pvectmask() &&
18129 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18130 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18131 match(Set dst (VectorReinterpret src));
18132 ins_cost(125);
18133 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18134 ins_encode %{
18135 switch (Matcher::vector_length_in_bytes(this, $src)) {
18136 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18137 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18138 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18139 default: ShouldNotReachHere();
18140 }
18141 %}
18142 ins_pipe( pipe_slow );
18143 %}
18144
18145 instruct reinterpret_shrink(vec dst, legVec src) %{
18146 predicate(!n->bottom_type()->isa_pvectmask() &&
18147 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18148 match(Set dst (VectorReinterpret src));
18149 ins_cost(125);
18150 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18151 ins_encode %{
18152 switch (Matcher::vector_length_in_bytes(this)) {
18153 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18154 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18155 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18156 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18157 default: ShouldNotReachHere();
18158 }
18159 %}
18160 ins_pipe( pipe_slow );
18161 %}
18162
18163 // ----------------------------------------------------------------------------------------------------
18164
18165 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18166 match(Set dst (RoundDoubleMode src rmode));
18167 format %{ "roundsd $dst,$src" %}
18168 ins_cost(150);
18169 ins_encode %{
18170 assert(UseSSE >= 4, "required");
18171 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18172 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18173 }
18174 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18175 %}
18176 ins_pipe(pipe_slow);
18177 %}
18178
18179 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18180 match(Set dst (RoundDoubleMode con rmode));
18181 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18182 ins_cost(150);
18183 ins_encode %{
18184 assert(UseSSE >= 4, "required");
18185 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18186 %}
18187 ins_pipe(pipe_slow);
18188 %}
18189
18190 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18191 predicate(Matcher::vector_length(n) < 8);
18192 match(Set dst (RoundDoubleModeV src rmode));
18193 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18194 ins_encode %{
18195 assert(UseAVX > 0, "required");
18196 int vlen_enc = vector_length_encoding(this);
18197 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18198 %}
18199 ins_pipe( pipe_slow );
18200 %}
18201
18202 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18203 predicate(Matcher::vector_length(n) == 8);
18204 match(Set dst (RoundDoubleModeV src rmode));
18205 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18206 ins_encode %{
18207 assert(UseAVX > 2, "required");
18208 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18209 %}
18210 ins_pipe( pipe_slow );
18211 %}
18212
18213 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18214 predicate(Matcher::vector_length(n) < 8);
18215 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18216 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18217 ins_encode %{
18218 assert(UseAVX > 0, "required");
18219 int vlen_enc = vector_length_encoding(this);
18220 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18221 %}
18222 ins_pipe( pipe_slow );
18223 %}
18224
18225 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18226 predicate(Matcher::vector_length(n) == 8);
18227 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18228 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18229 ins_encode %{
18230 assert(UseAVX > 2, "required");
18231 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18232 %}
18233 ins_pipe( pipe_slow );
18234 %}
18235
18236 instruct onspinwait() %{
18237 match(OnSpinWait);
18238 ins_cost(200);
18239
18240 format %{
18241 $$template
18242 $$emit$$"pause\t! membar_onspinwait"
18243 %}
18244 ins_encode %{
18245 __ pause();
18246 %}
18247 ins_pipe(pipe_slow);
18248 %}
18249
18250 // a * b + c
18251 instruct fmaD_reg(regD a, regD b, regD c) %{
18252 match(Set c (FmaD c (Binary a b)));
18253 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18254 ins_cost(150);
18255 ins_encode %{
18256 assert(UseFMA, "Needs FMA instructions support.");
18257 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18258 %}
18259 ins_pipe( pipe_slow );
18260 %}
18261
18262 // a * b + c
18263 instruct fmaF_reg(regF a, regF b, regF c) %{
18264 match(Set c (FmaF c (Binary a b)));
18265 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18266 ins_cost(150);
18267 ins_encode %{
18268 assert(UseFMA, "Needs FMA instructions support.");
18269 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18270 %}
18271 ins_pipe( pipe_slow );
18272 %}
18273
18274 // ====================VECTOR INSTRUCTIONS=====================================
18275
18276 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18277 instruct MoveVec2Leg(legVec dst, vec src) %{
18278 match(Set dst src);
18279 format %{ "" %}
18280 ins_encode %{
18281 ShouldNotReachHere();
18282 %}
18283 ins_pipe( fpu_reg_reg );
18284 %}
18285
18286 instruct MoveLeg2Vec(vec dst, legVec src) %{
18287 match(Set dst src);
18288 format %{ "" %}
18289 ins_encode %{
18290 ShouldNotReachHere();
18291 %}
18292 ins_pipe( fpu_reg_reg );
18293 %}
18294
18295 // ============================================================================
18296
18297 // Load vectors generic operand pattern
18298 instruct loadV(vec dst, memory mem) %{
18299 match(Set dst (LoadVector mem));
18300 ins_cost(125);
18301 format %{ "load_vector $dst,$mem" %}
18302 ins_encode %{
18303 BasicType bt = Matcher::vector_element_basic_type(this);
18304 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18305 %}
18306 ins_pipe( pipe_slow );
18307 %}
18308
18309 // Store vectors generic operand pattern.
18310 instruct storeV(memory mem, vec src) %{
18311 match(Set mem (StoreVector mem src));
18312 ins_cost(145);
18313 format %{ "store_vector $mem,$src\n\t" %}
18314 ins_encode %{
18315 switch (Matcher::vector_length_in_bytes(this, $src)) {
18316 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18317 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18318 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18319 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18320 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18321 default: ShouldNotReachHere();
18322 }
18323 %}
18324 ins_pipe( pipe_slow );
18325 %}
18326
18327 // ---------------------------------------- Gather ------------------------------------
18328
18329 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18330
18331 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18332 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18333 Matcher::vector_length_in_bytes(n) <= 32);
18334 match(Set dst (LoadVectorGather mem idx));
18335 effect(TEMP dst, TEMP tmp, TEMP mask);
18336 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18337 ins_encode %{
18338 int vlen_enc = vector_length_encoding(this);
18339 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18340 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18341 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18342 __ lea($tmp$$Register, $mem$$Address);
18343 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18344 %}
18345 ins_pipe( pipe_slow );
18346 %}
18347
18348
18349 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18350 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18351 !is_subword_type(Matcher::vector_element_basic_type(n)));
18352 match(Set dst (LoadVectorGather mem idx));
18353 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18354 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18355 ins_encode %{
18356 int vlen_enc = vector_length_encoding(this);
18357 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18358 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18359 __ lea($tmp$$Register, $mem$$Address);
18360 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18361 %}
18362 ins_pipe( pipe_slow );
18363 %}
18364
18365 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18366 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18367 !is_subword_type(Matcher::vector_element_basic_type(n)));
18368 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18369 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18370 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18371 ins_encode %{
18372 assert(UseAVX > 2, "sanity");
18373 int vlen_enc = vector_length_encoding(this);
18374 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18375 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18376 // Note: Since gather instruction partially updates the opmask register used
18377 // for predication hense moving mask operand to a temporary.
18378 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18379 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18380 __ lea($tmp$$Register, $mem$$Address);
18381 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18382 %}
18383 ins_pipe( pipe_slow );
18384 %}
18385
18386 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18387 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18388 match(Set dst (LoadVectorGather mem idx_base));
18389 effect(TEMP tmp, TEMP rtmp);
18390 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18391 ins_encode %{
18392 int vlen_enc = vector_length_encoding(this);
18393 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18394 __ lea($tmp$$Register, $mem$$Address);
18395 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18396 %}
18397 ins_pipe( pipe_slow );
18398 %}
18399
18400 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18401 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18402 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18403 match(Set dst (LoadVectorGather mem idx_base));
18404 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18405 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18406 ins_encode %{
18407 int vlen_enc = vector_length_encoding(this);
18408 int vector_len = Matcher::vector_length(this);
18409 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18410 __ lea($tmp$$Register, $mem$$Address);
18411 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18412 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18413 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18414 %}
18415 ins_pipe( pipe_slow );
18416 %}
18417
18418 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18419 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18420 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18421 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18422 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18423 ins_encode %{
18424 int vlen_enc = vector_length_encoding(this);
18425 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18426 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18427 __ lea($tmp$$Register, $mem$$Address);
18428 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18429 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18430 %}
18431 ins_pipe( pipe_slow );
18432 %}
18433
18434 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18435 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18436 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18437 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18438 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18439 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18440 ins_encode %{
18441 int vlen_enc = vector_length_encoding(this);
18442 int vector_len = Matcher::vector_length(this);
18443 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18444 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18445 __ lea($tmp$$Register, $mem$$Address);
18446 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18447 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18448 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18449 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18450 %}
18451 ins_pipe( pipe_slow );
18452 %}
18453
18454 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18455 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18456 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18457 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18458 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18459 ins_encode %{
18460 int vlen_enc = vector_length_encoding(this);
18461 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18462 __ lea($tmp$$Register, $mem$$Address);
18463 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18464 if (elem_bt == T_SHORT) {
18465 __ movl($mask_idx$$Register, 0x55555555);
18466 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18467 }
18468 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18469 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18470 %}
18471 ins_pipe( pipe_slow );
18472 %}
18473
18474 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18475 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18476 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18477 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18478 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18479 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18480 ins_encode %{
18481 int vlen_enc = vector_length_encoding(this);
18482 int vector_len = Matcher::vector_length(this);
18483 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18484 __ lea($tmp$$Register, $mem$$Address);
18485 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18486 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18487 if (elem_bt == T_SHORT) {
18488 __ movl($mask_idx$$Register, 0x55555555);
18489 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18490 }
18491 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18492 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18493 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18494 %}
18495 ins_pipe( pipe_slow );
18496 %}
18497
18498 // ====================Scatter=======================================
18499
18500 // Scatter INT, LONG, FLOAT, DOUBLE
18501
18502 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18503 predicate(UseAVX > 2);
18504 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18505 effect(TEMP tmp, TEMP ktmp);
18506 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18507 ins_encode %{
18508 int vlen_enc = vector_length_encoding(this, $src);
18509 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18510
18511 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18512 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18513
18514 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18515 __ lea($tmp$$Register, $mem$$Address);
18516 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18517 %}
18518 ins_pipe( pipe_slow );
18519 %}
18520
18521 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18522 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18523 effect(TEMP tmp, TEMP ktmp);
18524 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18525 ins_encode %{
18526 int vlen_enc = vector_length_encoding(this, $src);
18527 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18528 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18529 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18530 // Note: Since scatter instruction partially updates the opmask register used
18531 // for predication hense moving mask operand to a temporary.
18532 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18533 __ lea($tmp$$Register, $mem$$Address);
18534 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18535 %}
18536 ins_pipe( pipe_slow );
18537 %}
18538
18539 // ====================REPLICATE=======================================
18540
18541 // Replicate byte scalar to be vector
18542 instruct vReplB_reg(vec dst, rRegI src) %{
18543 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18544 match(Set dst (Replicate src));
18545 format %{ "replicateB $dst,$src" %}
18546 ins_encode %{
18547 uint vlen = Matcher::vector_length(this);
18548 if (UseAVX >= 2) {
18549 int vlen_enc = vector_length_encoding(this);
18550 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18551 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18552 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18553 } else {
18554 __ movdl($dst$$XMMRegister, $src$$Register);
18555 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18556 }
18557 } else {
18558 assert(UseAVX < 2, "");
18559 __ movdl($dst$$XMMRegister, $src$$Register);
18560 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18561 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18562 if (vlen >= 16) {
18563 assert(vlen == 16, "");
18564 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18565 }
18566 }
18567 %}
18568 ins_pipe( pipe_slow );
18569 %}
18570
18571 instruct ReplB_mem(vec dst, memory mem) %{
18572 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18573 match(Set dst (Replicate (LoadB mem)));
18574 format %{ "replicateB $dst,$mem" %}
18575 ins_encode %{
18576 int vlen_enc = vector_length_encoding(this);
18577 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18578 %}
18579 ins_pipe( pipe_slow );
18580 %}
18581
18582 // ====================ReplicateS=======================================
18583
18584 instruct vReplS_reg(vec dst, rRegI src) %{
18585 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18586 match(Set dst (Replicate src));
18587 format %{ "replicateS $dst,$src" %}
18588 ins_encode %{
18589 uint vlen = Matcher::vector_length(this);
18590 int vlen_enc = vector_length_encoding(this);
18591 if (UseAVX >= 2) {
18592 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18593 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18594 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18595 } else {
18596 __ movdl($dst$$XMMRegister, $src$$Register);
18597 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18598 }
18599 } else {
18600 assert(UseAVX < 2, "");
18601 __ movdl($dst$$XMMRegister, $src$$Register);
18602 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18603 if (vlen >= 8) {
18604 assert(vlen == 8, "");
18605 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18606 }
18607 }
18608 %}
18609 ins_pipe( pipe_slow );
18610 %}
18611
18612 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18613 match(Set dst (Replicate con));
18614 effect(TEMP rtmp);
18615 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18616 ins_encode %{
18617 int vlen_enc = vector_length_encoding(this);
18618 BasicType bt = Matcher::vector_element_basic_type(this);
18619 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18620 __ movl($rtmp$$Register, $con$$constant);
18621 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18622 %}
18623 ins_pipe( pipe_slow );
18624 %}
18625
18626 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18627 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18628 match(Set dst (Replicate src));
18629 effect(TEMP rtmp);
18630 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18631 ins_encode %{
18632 int vlen_enc = vector_length_encoding(this);
18633 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18634 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18635 %}
18636 ins_pipe( pipe_slow );
18637 %}
18638
18639 instruct ReplS_mem(vec dst, memory mem) %{
18640 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18641 match(Set dst (Replicate (LoadS mem)));
18642 format %{ "replicateS $dst,$mem" %}
18643 ins_encode %{
18644 int vlen_enc = vector_length_encoding(this);
18645 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18646 %}
18647 ins_pipe( pipe_slow );
18648 %}
18649
18650 // ====================ReplicateI=======================================
18651
18652 instruct ReplI_reg(vec dst, rRegI src) %{
18653 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18654 match(Set dst (Replicate src));
18655 format %{ "replicateI $dst,$src" %}
18656 ins_encode %{
18657 uint vlen = Matcher::vector_length(this);
18658 int vlen_enc = vector_length_encoding(this);
18659 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18660 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18661 } else if (VM_Version::supports_avx2()) {
18662 __ movdl($dst$$XMMRegister, $src$$Register);
18663 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18664 } else {
18665 __ movdl($dst$$XMMRegister, $src$$Register);
18666 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18667 }
18668 %}
18669 ins_pipe( pipe_slow );
18670 %}
18671
18672 instruct ReplI_mem(vec dst, memory mem) %{
18673 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18674 match(Set dst (Replicate (LoadI mem)));
18675 format %{ "replicateI $dst,$mem" %}
18676 ins_encode %{
18677 int vlen_enc = vector_length_encoding(this);
18678 if (VM_Version::supports_avx2()) {
18679 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18680 } else if (VM_Version::supports_avx()) {
18681 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18682 } else {
18683 __ movdl($dst$$XMMRegister, $mem$$Address);
18684 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18685 }
18686 %}
18687 ins_pipe( pipe_slow );
18688 %}
18689
18690 instruct ReplI_imm(vec dst, immI con) %{
18691 predicate(Matcher::is_non_long_integral_vector(n));
18692 match(Set dst (Replicate con));
18693 format %{ "replicateI $dst,$con" %}
18694 ins_encode %{
18695 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18696 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18697 type2aelembytes(Matcher::vector_element_basic_type(this))));
18698 BasicType bt = Matcher::vector_element_basic_type(this);
18699 int vlen = Matcher::vector_length_in_bytes(this);
18700 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18701 %}
18702 ins_pipe( pipe_slow );
18703 %}
18704
18705 // Replicate scalar zero to be vector
18706 instruct ReplI_zero(vec dst, immI_0 zero) %{
18707 predicate(Matcher::is_non_long_integral_vector(n));
18708 match(Set dst (Replicate zero));
18709 format %{ "replicateI $dst,$zero" %}
18710 ins_encode %{
18711 int vlen_enc = vector_length_encoding(this);
18712 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18713 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18714 } else {
18715 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18716 }
18717 %}
18718 ins_pipe( fpu_reg_reg );
18719 %}
18720
18721 instruct ReplI_M1(vec dst, immI_M1 con) %{
18722 predicate(Matcher::is_non_long_integral_vector(n));
18723 match(Set dst (Replicate con));
18724 format %{ "vallones $dst" %}
18725 ins_encode %{
18726 int vector_len = vector_length_encoding(this);
18727 __ vallones($dst$$XMMRegister, vector_len);
18728 %}
18729 ins_pipe( pipe_slow );
18730 %}
18731
18732 // ====================ReplicateL=======================================
18733
18734 // Replicate long (8 byte) scalar to be vector
18735 instruct ReplL_reg(vec dst, rRegL src) %{
18736 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18737 match(Set dst (Replicate src));
18738 format %{ "replicateL $dst,$src" %}
18739 ins_encode %{
18740 int vlen = Matcher::vector_length(this);
18741 int vlen_enc = vector_length_encoding(this);
18742 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18743 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18744 } else if (VM_Version::supports_avx2()) {
18745 __ movdq($dst$$XMMRegister, $src$$Register);
18746 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18747 } else {
18748 __ movdq($dst$$XMMRegister, $src$$Register);
18749 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18750 }
18751 %}
18752 ins_pipe( pipe_slow );
18753 %}
18754
18755 instruct ReplL_mem(vec dst, memory mem) %{
18756 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18757 match(Set dst (Replicate (LoadL mem)));
18758 format %{ "replicateL $dst,$mem" %}
18759 ins_encode %{
18760 int vlen_enc = vector_length_encoding(this);
18761 if (VM_Version::supports_avx2()) {
18762 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18763 } else if (VM_Version::supports_sse3()) {
18764 __ movddup($dst$$XMMRegister, $mem$$Address);
18765 } else {
18766 __ movq($dst$$XMMRegister, $mem$$Address);
18767 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18768 }
18769 %}
18770 ins_pipe( pipe_slow );
18771 %}
18772
18773 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18774 instruct ReplL_imm(vec dst, immL con) %{
18775 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18776 match(Set dst (Replicate con));
18777 format %{ "replicateL $dst,$con" %}
18778 ins_encode %{
18779 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18780 int vlen = Matcher::vector_length_in_bytes(this);
18781 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18782 %}
18783 ins_pipe( pipe_slow );
18784 %}
18785
18786 instruct ReplL_zero(vec dst, immL0 zero) %{
18787 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18788 match(Set dst (Replicate zero));
18789 format %{ "replicateL $dst,$zero" %}
18790 ins_encode %{
18791 int vlen_enc = vector_length_encoding(this);
18792 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18793 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18794 } else {
18795 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18796 }
18797 %}
18798 ins_pipe( fpu_reg_reg );
18799 %}
18800
18801 instruct ReplL_M1(vec dst, immL_M1 con) %{
18802 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18803 match(Set dst (Replicate con));
18804 format %{ "vallones $dst" %}
18805 ins_encode %{
18806 int vector_len = vector_length_encoding(this);
18807 __ vallones($dst$$XMMRegister, vector_len);
18808 %}
18809 ins_pipe( pipe_slow );
18810 %}
18811
18812 // ====================ReplicateF=======================================
18813
18814 instruct vReplF_reg(vec dst, vlRegF src) %{
18815 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18816 match(Set dst (Replicate src));
18817 format %{ "replicateF $dst,$src" %}
18818 ins_encode %{
18819 uint vlen = Matcher::vector_length(this);
18820 int vlen_enc = vector_length_encoding(this);
18821 if (vlen <= 4) {
18822 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18823 } else if (VM_Version::supports_avx2()) {
18824 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18825 } else {
18826 assert(vlen == 8, "sanity");
18827 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18828 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18829 }
18830 %}
18831 ins_pipe( pipe_slow );
18832 %}
18833
18834 instruct ReplF_reg(vec dst, vlRegF src) %{
18835 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18836 match(Set dst (Replicate src));
18837 format %{ "replicateF $dst,$src" %}
18838 ins_encode %{
18839 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18840 %}
18841 ins_pipe( pipe_slow );
18842 %}
18843
18844 instruct ReplF_mem(vec dst, memory mem) %{
18845 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18846 match(Set dst (Replicate (LoadF mem)));
18847 format %{ "replicateF $dst,$mem" %}
18848 ins_encode %{
18849 int vlen_enc = vector_length_encoding(this);
18850 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18851 %}
18852 ins_pipe( pipe_slow );
18853 %}
18854
18855 // Replicate float scalar immediate to be vector by loading from const table.
18856 instruct ReplF_imm(vec dst, immF con) %{
18857 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18858 match(Set dst (Replicate con));
18859 format %{ "replicateF $dst,$con" %}
18860 ins_encode %{
18861 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18862 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18863 int vlen = Matcher::vector_length_in_bytes(this);
18864 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18865 %}
18866 ins_pipe( pipe_slow );
18867 %}
18868
18869 instruct ReplF_zero(vec dst, immF0 zero) %{
18870 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18871 match(Set dst (Replicate zero));
18872 format %{ "replicateF $dst,$zero" %}
18873 ins_encode %{
18874 int vlen_enc = vector_length_encoding(this);
18875 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18876 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18877 } else {
18878 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18879 }
18880 %}
18881 ins_pipe( fpu_reg_reg );
18882 %}
18883
18884 // ====================ReplicateD=======================================
18885
18886 // Replicate double (8 bytes) scalar to be vector
18887 instruct vReplD_reg(vec dst, vlRegD src) %{
18888 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18889 match(Set dst (Replicate src));
18890 format %{ "replicateD $dst,$src" %}
18891 ins_encode %{
18892 uint vlen = Matcher::vector_length(this);
18893 int vlen_enc = vector_length_encoding(this);
18894 if (vlen <= 2) {
18895 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18896 } else if (VM_Version::supports_avx2()) {
18897 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18898 } else {
18899 assert(vlen == 4, "sanity");
18900 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18901 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18902 }
18903 %}
18904 ins_pipe( pipe_slow );
18905 %}
18906
18907 instruct ReplD_reg(vec dst, vlRegD src) %{
18908 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18909 match(Set dst (Replicate src));
18910 format %{ "replicateD $dst,$src" %}
18911 ins_encode %{
18912 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18913 %}
18914 ins_pipe( pipe_slow );
18915 %}
18916
18917 instruct ReplD_mem(vec dst, memory mem) %{
18918 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18919 match(Set dst (Replicate (LoadD mem)));
18920 format %{ "replicateD $dst,$mem" %}
18921 ins_encode %{
18922 if (Matcher::vector_length(this) >= 4) {
18923 int vlen_enc = vector_length_encoding(this);
18924 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18925 } else {
18926 __ movddup($dst$$XMMRegister, $mem$$Address);
18927 }
18928 %}
18929 ins_pipe( pipe_slow );
18930 %}
18931
18932 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18933 instruct ReplD_imm(vec dst, immD con) %{
18934 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18935 match(Set dst (Replicate con));
18936 format %{ "replicateD $dst,$con" %}
18937 ins_encode %{
18938 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18939 int vlen = Matcher::vector_length_in_bytes(this);
18940 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18941 %}
18942 ins_pipe( pipe_slow );
18943 %}
18944
18945 instruct ReplD_zero(vec dst, immD0 zero) %{
18946 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18947 match(Set dst (Replicate zero));
18948 format %{ "replicateD $dst,$zero" %}
18949 ins_encode %{
18950 int vlen_enc = vector_length_encoding(this);
18951 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18952 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18953 } else {
18954 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18955 }
18956 %}
18957 ins_pipe( fpu_reg_reg );
18958 %}
18959
18960 // ====================VECTOR INSERT=======================================
18961
18962 instruct insert(vec dst, rRegI val, immU8 idx) %{
18963 predicate(Matcher::vector_length_in_bytes(n) < 32);
18964 match(Set dst (VectorInsert (Binary dst val) idx));
18965 format %{ "vector_insert $dst,$val,$idx" %}
18966 ins_encode %{
18967 assert(UseSSE >= 4, "required");
18968 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18969
18970 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18971
18972 assert(is_integral_type(elem_bt), "");
18973 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18974
18975 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18976 %}
18977 ins_pipe( pipe_slow );
18978 %}
18979
18980 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18981 predicate(Matcher::vector_length_in_bytes(n) == 32);
18982 match(Set dst (VectorInsert (Binary src val) idx));
18983 effect(TEMP vtmp);
18984 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18985 ins_encode %{
18986 int vlen_enc = Assembler::AVX_256bit;
18987 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18988 int elem_per_lane = 16/type2aelembytes(elem_bt);
18989 int log2epr = log2(elem_per_lane);
18990
18991 assert(is_integral_type(elem_bt), "sanity");
18992 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18993
18994 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18995 uint y_idx = ($idx$$constant >> log2epr) & 1;
18996 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18997 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18998 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18999 %}
19000 ins_pipe( pipe_slow );
19001 %}
19002
19003 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19004 predicate(Matcher::vector_length_in_bytes(n) == 64);
19005 match(Set dst (VectorInsert (Binary src val) idx));
19006 effect(TEMP vtmp);
19007 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19008 ins_encode %{
19009 assert(UseAVX > 2, "sanity");
19010
19011 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19012 int elem_per_lane = 16/type2aelembytes(elem_bt);
19013 int log2epr = log2(elem_per_lane);
19014
19015 assert(is_integral_type(elem_bt), "");
19016 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19017
19018 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19019 uint y_idx = ($idx$$constant >> log2epr) & 3;
19020 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19021 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19022 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19023 %}
19024 ins_pipe( pipe_slow );
19025 %}
19026
19027 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19028 predicate(Matcher::vector_length(n) == 2);
19029 match(Set dst (VectorInsert (Binary dst val) idx));
19030 format %{ "vector_insert $dst,$val,$idx" %}
19031 ins_encode %{
19032 assert(UseSSE >= 4, "required");
19033 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19034 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19035
19036 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19037 %}
19038 ins_pipe( pipe_slow );
19039 %}
19040
19041 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19042 predicate(Matcher::vector_length(n) == 4);
19043 match(Set dst (VectorInsert (Binary src val) idx));
19044 effect(TEMP vtmp);
19045 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19046 ins_encode %{
19047 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19048 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19049
19050 uint x_idx = $idx$$constant & right_n_bits(1);
19051 uint y_idx = ($idx$$constant >> 1) & 1;
19052 int vlen_enc = Assembler::AVX_256bit;
19053 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19054 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19055 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19056 %}
19057 ins_pipe( pipe_slow );
19058 %}
19059
19060 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19061 predicate(Matcher::vector_length(n) == 8);
19062 match(Set dst (VectorInsert (Binary src val) idx));
19063 effect(TEMP vtmp);
19064 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19065 ins_encode %{
19066 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19067 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19068
19069 uint x_idx = $idx$$constant & right_n_bits(1);
19070 uint y_idx = ($idx$$constant >> 1) & 3;
19071 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19072 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19073 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19074 %}
19075 ins_pipe( pipe_slow );
19076 %}
19077
19078 instruct insertF(vec dst, regF val, immU8 idx) %{
19079 predicate(Matcher::vector_length(n) < 8);
19080 match(Set dst (VectorInsert (Binary dst val) idx));
19081 format %{ "vector_insert $dst,$val,$idx" %}
19082 ins_encode %{
19083 assert(UseSSE >= 4, "sanity");
19084
19085 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19086 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19087
19088 uint x_idx = $idx$$constant & right_n_bits(2);
19089 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19090 %}
19091 ins_pipe( pipe_slow );
19092 %}
19093
19094 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19095 predicate(Matcher::vector_length(n) >= 8);
19096 match(Set dst (VectorInsert (Binary src val) idx));
19097 effect(TEMP vtmp);
19098 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19099 ins_encode %{
19100 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19101 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19102
19103 int vlen = Matcher::vector_length(this);
19104 uint x_idx = $idx$$constant & right_n_bits(2);
19105 if (vlen == 8) {
19106 uint y_idx = ($idx$$constant >> 2) & 1;
19107 int vlen_enc = Assembler::AVX_256bit;
19108 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19109 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19110 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19111 } else {
19112 assert(vlen == 16, "sanity");
19113 uint y_idx = ($idx$$constant >> 2) & 3;
19114 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19115 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19116 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19117 }
19118 %}
19119 ins_pipe( pipe_slow );
19120 %}
19121
19122 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19123 predicate(Matcher::vector_length(n) == 2);
19124 match(Set dst (VectorInsert (Binary dst val) idx));
19125 effect(TEMP tmp);
19126 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19127 ins_encode %{
19128 assert(UseSSE >= 4, "sanity");
19129 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19130 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19131
19132 __ movq($tmp$$Register, $val$$XMMRegister);
19133 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19134 %}
19135 ins_pipe( pipe_slow );
19136 %}
19137
19138 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19139 predicate(Matcher::vector_length(n) == 4);
19140 match(Set dst (VectorInsert (Binary src val) idx));
19141 effect(TEMP vtmp, TEMP tmp);
19142 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19143 ins_encode %{
19144 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19145 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19146
19147 uint x_idx = $idx$$constant & right_n_bits(1);
19148 uint y_idx = ($idx$$constant >> 1) & 1;
19149 int vlen_enc = Assembler::AVX_256bit;
19150 __ movq($tmp$$Register, $val$$XMMRegister);
19151 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19152 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19153 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19154 %}
19155 ins_pipe( pipe_slow );
19156 %}
19157
19158 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19159 predicate(Matcher::vector_length(n) == 8);
19160 match(Set dst (VectorInsert (Binary src val) idx));
19161 effect(TEMP tmp, TEMP vtmp);
19162 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19163 ins_encode %{
19164 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19165 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19166
19167 uint x_idx = $idx$$constant & right_n_bits(1);
19168 uint y_idx = ($idx$$constant >> 1) & 3;
19169 __ movq($tmp$$Register, $val$$XMMRegister);
19170 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19171 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19172 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19173 %}
19174 ins_pipe( pipe_slow );
19175 %}
19176
19177 // ====================REDUCTION ARITHMETIC=======================================
19178
19179 // =======================Int Reduction==========================================
19180
19181 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19182 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19183 match(Set dst (AddReductionVI src1 src2));
19184 match(Set dst (MulReductionVI src1 src2));
19185 match(Set dst (AndReductionV src1 src2));
19186 match(Set dst ( OrReductionV src1 src2));
19187 match(Set dst (XorReductionV src1 src2));
19188 match(Set dst (MinReductionV src1 src2));
19189 match(Set dst (MaxReductionV src1 src2));
19190 match(Set dst (UMinReductionV src1 src2));
19191 match(Set dst (UMaxReductionV src1 src2));
19192 effect(TEMP vtmp1, TEMP vtmp2);
19193 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19194 ins_encode %{
19195 int opcode = this->ideal_Opcode();
19196 int vlen = Matcher::vector_length(this, $src2);
19197 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19198 %}
19199 ins_pipe( pipe_slow );
19200 %}
19201
19202 // =======================Long Reduction==========================================
19203
19204 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19205 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19206 match(Set dst (AddReductionVL src1 src2));
19207 match(Set dst (MulReductionVL src1 src2));
19208 match(Set dst (AndReductionV src1 src2));
19209 match(Set dst ( OrReductionV src1 src2));
19210 match(Set dst (XorReductionV src1 src2));
19211 match(Set dst (MinReductionV src1 src2));
19212 match(Set dst (MaxReductionV src1 src2));
19213 match(Set dst (UMinReductionV src1 src2));
19214 match(Set dst (UMaxReductionV src1 src2));
19215 effect(TEMP vtmp1, TEMP vtmp2);
19216 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19217 ins_encode %{
19218 int opcode = this->ideal_Opcode();
19219 int vlen = Matcher::vector_length(this, $src2);
19220 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19221 %}
19222 ins_pipe( pipe_slow );
19223 %}
19224
19225 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19226 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19227 match(Set dst (AddReductionVL src1 src2));
19228 match(Set dst (MulReductionVL src1 src2));
19229 match(Set dst (AndReductionV src1 src2));
19230 match(Set dst ( OrReductionV src1 src2));
19231 match(Set dst (XorReductionV src1 src2));
19232 match(Set dst (MinReductionV src1 src2));
19233 match(Set dst (MaxReductionV src1 src2));
19234 match(Set dst (UMinReductionV src1 src2));
19235 match(Set dst (UMaxReductionV src1 src2));
19236 effect(TEMP vtmp1, TEMP vtmp2);
19237 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19238 ins_encode %{
19239 int opcode = this->ideal_Opcode();
19240 int vlen = Matcher::vector_length(this, $src2);
19241 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19242 %}
19243 ins_pipe( pipe_slow );
19244 %}
19245
19246 // =======================Float Reduction==========================================
19247
19248 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19249 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19250 match(Set dst (AddReductionVF dst src));
19251 match(Set dst (MulReductionVF dst src));
19252 effect(TEMP dst, TEMP vtmp);
19253 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19254 ins_encode %{
19255 int opcode = this->ideal_Opcode();
19256 int vlen = Matcher::vector_length(this, $src);
19257 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19258 %}
19259 ins_pipe( pipe_slow );
19260 %}
19261
19262 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19263 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19264 match(Set dst (AddReductionVF dst src));
19265 match(Set dst (MulReductionVF dst src));
19266 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19267 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19268 ins_encode %{
19269 int opcode = this->ideal_Opcode();
19270 int vlen = Matcher::vector_length(this, $src);
19271 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19272 %}
19273 ins_pipe( pipe_slow );
19274 %}
19275
19276 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19277 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19278 match(Set dst (AddReductionVF dst src));
19279 match(Set dst (MulReductionVF dst src));
19280 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19281 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19282 ins_encode %{
19283 int opcode = this->ideal_Opcode();
19284 int vlen = Matcher::vector_length(this, $src);
19285 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19286 %}
19287 ins_pipe( pipe_slow );
19288 %}
19289
19290
19291 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19292 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19293 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19294 // src1 contains reduction identity
19295 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19296 match(Set dst (AddReductionVF src1 src2));
19297 match(Set dst (MulReductionVF src1 src2));
19298 effect(TEMP dst);
19299 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19300 ins_encode %{
19301 int opcode = this->ideal_Opcode();
19302 int vlen = Matcher::vector_length(this, $src2);
19303 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19304 %}
19305 ins_pipe( pipe_slow );
19306 %}
19307
19308 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19309 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19310 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19311 // src1 contains reduction identity
19312 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19313 match(Set dst (AddReductionVF src1 src2));
19314 match(Set dst (MulReductionVF src1 src2));
19315 effect(TEMP dst, TEMP vtmp);
19316 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19317 ins_encode %{
19318 int opcode = this->ideal_Opcode();
19319 int vlen = Matcher::vector_length(this, $src2);
19320 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19321 %}
19322 ins_pipe( pipe_slow );
19323 %}
19324
19325 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19326 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19327 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19328 // src1 contains reduction identity
19329 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19330 match(Set dst (AddReductionVF src1 src2));
19331 match(Set dst (MulReductionVF src1 src2));
19332 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19333 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19334 ins_encode %{
19335 int opcode = this->ideal_Opcode();
19336 int vlen = Matcher::vector_length(this, $src2);
19337 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19338 %}
19339 ins_pipe( pipe_slow );
19340 %}
19341
19342 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19343 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19344 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19345 // src1 contains reduction identity
19346 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19347 match(Set dst (AddReductionVF src1 src2));
19348 match(Set dst (MulReductionVF src1 src2));
19349 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19350 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19351 ins_encode %{
19352 int opcode = this->ideal_Opcode();
19353 int vlen = Matcher::vector_length(this, $src2);
19354 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19355 %}
19356 ins_pipe( pipe_slow );
19357 %}
19358
19359 // =======================Double Reduction==========================================
19360
19361 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19362 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19363 match(Set dst (AddReductionVD dst src));
19364 match(Set dst (MulReductionVD dst src));
19365 effect(TEMP dst, TEMP vtmp);
19366 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19367 ins_encode %{
19368 int opcode = this->ideal_Opcode();
19369 int vlen = Matcher::vector_length(this, $src);
19370 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19371 %}
19372 ins_pipe( pipe_slow );
19373 %}
19374
19375 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19376 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19377 match(Set dst (AddReductionVD dst src));
19378 match(Set dst (MulReductionVD dst src));
19379 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19380 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19381 ins_encode %{
19382 int opcode = this->ideal_Opcode();
19383 int vlen = Matcher::vector_length(this, $src);
19384 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19385 %}
19386 ins_pipe( pipe_slow );
19387 %}
19388
19389 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19390 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19391 match(Set dst (AddReductionVD dst src));
19392 match(Set dst (MulReductionVD dst src));
19393 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19394 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19395 ins_encode %{
19396 int opcode = this->ideal_Opcode();
19397 int vlen = Matcher::vector_length(this, $src);
19398 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19399 %}
19400 ins_pipe( pipe_slow );
19401 %}
19402
19403 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19404 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19405 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19406 // src1 contains reduction identity
19407 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19408 match(Set dst (AddReductionVD src1 src2));
19409 match(Set dst (MulReductionVD src1 src2));
19410 effect(TEMP dst);
19411 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19412 ins_encode %{
19413 int opcode = this->ideal_Opcode();
19414 int vlen = Matcher::vector_length(this, $src2);
19415 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19416 %}
19417 ins_pipe( pipe_slow );
19418 %}
19419
19420 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19421 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19422 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19423 // src1 contains reduction identity
19424 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19425 match(Set dst (AddReductionVD src1 src2));
19426 match(Set dst (MulReductionVD src1 src2));
19427 effect(TEMP dst, TEMP vtmp);
19428 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19429 ins_encode %{
19430 int opcode = this->ideal_Opcode();
19431 int vlen = Matcher::vector_length(this, $src2);
19432 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19433 %}
19434 ins_pipe( pipe_slow );
19435 %}
19436
19437 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19438 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19439 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19440 // src1 contains reduction identity
19441 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19442 match(Set dst (AddReductionVD src1 src2));
19443 match(Set dst (MulReductionVD src1 src2));
19444 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19445 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19446 ins_encode %{
19447 int opcode = this->ideal_Opcode();
19448 int vlen = Matcher::vector_length(this, $src2);
19449 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19450 %}
19451 ins_pipe( pipe_slow );
19452 %}
19453
19454 // =======================Byte Reduction==========================================
19455
19456 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19457 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19458 match(Set dst (AddReductionVI src1 src2));
19459 match(Set dst (AndReductionV src1 src2));
19460 match(Set dst ( OrReductionV src1 src2));
19461 match(Set dst (XorReductionV src1 src2));
19462 match(Set dst (MinReductionV src1 src2));
19463 match(Set dst (MaxReductionV src1 src2));
19464 match(Set dst (UMinReductionV src1 src2));
19465 match(Set dst (UMaxReductionV src1 src2));
19466 effect(TEMP vtmp1, TEMP vtmp2);
19467 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19468 ins_encode %{
19469 int opcode = this->ideal_Opcode();
19470 int vlen = Matcher::vector_length(this, $src2);
19471 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19472 %}
19473 ins_pipe( pipe_slow );
19474 %}
19475
19476 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19477 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19478 match(Set dst (AddReductionVI src1 src2));
19479 match(Set dst (AndReductionV src1 src2));
19480 match(Set dst ( OrReductionV src1 src2));
19481 match(Set dst (XorReductionV src1 src2));
19482 match(Set dst (MinReductionV src1 src2));
19483 match(Set dst (MaxReductionV src1 src2));
19484 match(Set dst (UMinReductionV src1 src2));
19485 match(Set dst (UMaxReductionV src1 src2));
19486 effect(TEMP vtmp1, TEMP vtmp2);
19487 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19488 ins_encode %{
19489 int opcode = this->ideal_Opcode();
19490 int vlen = Matcher::vector_length(this, $src2);
19491 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19492 %}
19493 ins_pipe( pipe_slow );
19494 %}
19495
19496 // =======================Short Reduction==========================================
19497
19498 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19499 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19500 match(Set dst (AddReductionVI src1 src2));
19501 match(Set dst (MulReductionVI src1 src2));
19502 match(Set dst (AndReductionV src1 src2));
19503 match(Set dst ( OrReductionV src1 src2));
19504 match(Set dst (XorReductionV src1 src2));
19505 match(Set dst (MinReductionV src1 src2));
19506 match(Set dst (MaxReductionV src1 src2));
19507 match(Set dst (UMinReductionV src1 src2));
19508 match(Set dst (UMaxReductionV src1 src2));
19509 effect(TEMP vtmp1, TEMP vtmp2);
19510 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19511 ins_encode %{
19512 int opcode = this->ideal_Opcode();
19513 int vlen = Matcher::vector_length(this, $src2);
19514 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19515 %}
19516 ins_pipe( pipe_slow );
19517 %}
19518
19519 // =======================Mul Reduction==========================================
19520
19521 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19522 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19523 Matcher::vector_length(n->in(2)) <= 32); // src2
19524 match(Set dst (MulReductionVI src1 src2));
19525 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19526 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19527 ins_encode %{
19528 int opcode = this->ideal_Opcode();
19529 int vlen = Matcher::vector_length(this, $src2);
19530 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19531 %}
19532 ins_pipe( pipe_slow );
19533 %}
19534
19535 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19536 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19537 Matcher::vector_length(n->in(2)) == 64); // src2
19538 match(Set dst (MulReductionVI src1 src2));
19539 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19540 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19541 ins_encode %{
19542 int opcode = this->ideal_Opcode();
19543 int vlen = Matcher::vector_length(this, $src2);
19544 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19545 %}
19546 ins_pipe( pipe_slow );
19547 %}
19548
19549 //--------------------Min/Max Float Reduction --------------------
19550 // Float Min Reduction
19551 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19552 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19553 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19554 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19555 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19556 Matcher::vector_length(n->in(2)) == 2);
19557 match(Set dst (MinReductionV src1 src2));
19558 match(Set dst (MaxReductionV src1 src2));
19559 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19560 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19561 ins_encode %{
19562 assert(UseAVX > 0, "sanity");
19563
19564 int opcode = this->ideal_Opcode();
19565 int vlen = Matcher::vector_length(this, $src2);
19566 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19567 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19568 %}
19569 ins_pipe( pipe_slow );
19570 %}
19571
19572 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19573 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19574 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19575 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19576 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19577 Matcher::vector_length(n->in(2)) >= 4);
19578 match(Set dst (MinReductionV src1 src2));
19579 match(Set dst (MaxReductionV src1 src2));
19580 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19581 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19582 ins_encode %{
19583 assert(UseAVX > 0, "sanity");
19584
19585 int opcode = this->ideal_Opcode();
19586 int vlen = Matcher::vector_length(this, $src2);
19587 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19588 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19589 %}
19590 ins_pipe( pipe_slow );
19591 %}
19592
19593 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19594 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19595 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19596 Matcher::vector_length(n->in(2)) == 2);
19597 match(Set dst (MinReductionV dst src));
19598 match(Set dst (MaxReductionV dst src));
19599 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19600 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19601 ins_encode %{
19602 assert(UseAVX > 0, "sanity");
19603
19604 int opcode = this->ideal_Opcode();
19605 int vlen = Matcher::vector_length(this, $src);
19606 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19607 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19608 %}
19609 ins_pipe( pipe_slow );
19610 %}
19611
19612
19613 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19614 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19615 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19616 Matcher::vector_length(n->in(2)) >= 4);
19617 match(Set dst (MinReductionV dst src));
19618 match(Set dst (MaxReductionV dst src));
19619 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19620 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19621 ins_encode %{
19622 assert(UseAVX > 0, "sanity");
19623
19624 int opcode = this->ideal_Opcode();
19625 int vlen = Matcher::vector_length(this, $src);
19626 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19627 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19628 %}
19629 ins_pipe( pipe_slow );
19630 %}
19631
19632 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19633 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19634 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19635 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19636 Matcher::vector_length(n->in(2)) == 2);
19637 match(Set dst (MinReductionV src1 src2));
19638 match(Set dst (MaxReductionV src1 src2));
19639 effect(TEMP dst, TEMP xtmp1);
19640 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19641 ins_encode %{
19642 int opcode = this->ideal_Opcode();
19643 int vlen = Matcher::vector_length(this, $src2);
19644 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19645 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19646 %}
19647 ins_pipe( pipe_slow );
19648 %}
19649
19650 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19651 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19652 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19653 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19654 Matcher::vector_length(n->in(2)) >= 4);
19655 match(Set dst (MinReductionV src1 src2));
19656 match(Set dst (MaxReductionV src1 src2));
19657 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19658 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19659 ins_encode %{
19660 int opcode = this->ideal_Opcode();
19661 int vlen = Matcher::vector_length(this, $src2);
19662 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19663 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19664 %}
19665 ins_pipe( pipe_slow );
19666 %}
19667
19668 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19669 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19670 Matcher::vector_length(n->in(2)) == 2);
19671 match(Set dst (MinReductionV dst src));
19672 match(Set dst (MaxReductionV dst src));
19673 effect(TEMP dst, TEMP xtmp1);
19674 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19675 ins_encode %{
19676 int opcode = this->ideal_Opcode();
19677 int vlen = Matcher::vector_length(this, $src);
19678 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19679 $xtmp1$$XMMRegister);
19680 %}
19681 ins_pipe( pipe_slow );
19682 %}
19683
19684 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19685 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19686 Matcher::vector_length(n->in(2)) >= 4);
19687 match(Set dst (MinReductionV dst src));
19688 match(Set dst (MaxReductionV dst src));
19689 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19690 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19691 ins_encode %{
19692 int opcode = this->ideal_Opcode();
19693 int vlen = Matcher::vector_length(this, $src);
19694 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19695 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19696 %}
19697 ins_pipe( pipe_slow );
19698 %}
19699
19700 //--------------------Min Double Reduction --------------------
19701 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19702 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19703 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19704 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19705 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19706 Matcher::vector_length(n->in(2)) == 2);
19707 match(Set dst (MinReductionV src1 src2));
19708 match(Set dst (MaxReductionV src1 src2));
19709 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19710 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19711 ins_encode %{
19712 assert(UseAVX > 0, "sanity");
19713
19714 int opcode = this->ideal_Opcode();
19715 int vlen = Matcher::vector_length(this, $src2);
19716 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19717 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19718 %}
19719 ins_pipe( pipe_slow );
19720 %}
19721
19722 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19723 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19724 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19725 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19726 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19727 Matcher::vector_length(n->in(2)) >= 4);
19728 match(Set dst (MinReductionV src1 src2));
19729 match(Set dst (MaxReductionV src1 src2));
19730 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19731 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19732 ins_encode %{
19733 assert(UseAVX > 0, "sanity");
19734
19735 int opcode = this->ideal_Opcode();
19736 int vlen = Matcher::vector_length(this, $src2);
19737 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19738 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19739 %}
19740 ins_pipe( pipe_slow );
19741 %}
19742
19743
19744 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19745 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19746 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19747 Matcher::vector_length(n->in(2)) == 2);
19748 match(Set dst (MinReductionV dst src));
19749 match(Set dst (MaxReductionV dst src));
19750 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19751 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19752 ins_encode %{
19753 assert(UseAVX > 0, "sanity");
19754
19755 int opcode = this->ideal_Opcode();
19756 int vlen = Matcher::vector_length(this, $src);
19757 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19758 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19759 %}
19760 ins_pipe( pipe_slow );
19761 %}
19762
19763 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19764 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19765 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19766 Matcher::vector_length(n->in(2)) >= 4);
19767 match(Set dst (MinReductionV dst src));
19768 match(Set dst (MaxReductionV dst src));
19769 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19770 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19771 ins_encode %{
19772 assert(UseAVX > 0, "sanity");
19773
19774 int opcode = this->ideal_Opcode();
19775 int vlen = Matcher::vector_length(this, $src);
19776 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19777 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19778 %}
19779 ins_pipe( pipe_slow );
19780 %}
19781
19782 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19783 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19784 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19785 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19786 Matcher::vector_length(n->in(2)) == 2);
19787 match(Set dst (MinReductionV src1 src2));
19788 match(Set dst (MaxReductionV src1 src2));
19789 effect(TEMP dst, TEMP xtmp1);
19790 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19791 ins_encode %{
19792 int opcode = this->ideal_Opcode();
19793 int vlen = Matcher::vector_length(this, $src2);
19794 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19795 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19796 %}
19797 ins_pipe( pipe_slow );
19798 %}
19799
19800 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19801 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19802 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19803 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19804 Matcher::vector_length(n->in(2)) >= 4);
19805 match(Set dst (MinReductionV src1 src2));
19806 match(Set dst (MaxReductionV src1 src2));
19807 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19808 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19809 ins_encode %{
19810 int opcode = this->ideal_Opcode();
19811 int vlen = Matcher::vector_length(this, $src2);
19812 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19813 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19814 %}
19815 ins_pipe( pipe_slow );
19816 %}
19817
19818
19819 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19820 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19821 Matcher::vector_length(n->in(2)) == 2);
19822 match(Set dst (MinReductionV dst src));
19823 match(Set dst (MaxReductionV dst src));
19824 effect(TEMP dst, TEMP xtmp1);
19825 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19826 ins_encode %{
19827 int opcode = this->ideal_Opcode();
19828 int vlen = Matcher::vector_length(this, $src);
19829 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19830 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19831 %}
19832 ins_pipe( pipe_slow );
19833 %}
19834
19835 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19836 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19837 Matcher::vector_length(n->in(2)) >= 4);
19838 match(Set dst (MinReductionV dst src));
19839 match(Set dst (MaxReductionV dst src));
19840 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19841 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19842 ins_encode %{
19843 int opcode = this->ideal_Opcode();
19844 int vlen = Matcher::vector_length(this, $src);
19845 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19846 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19847 %}
19848 ins_pipe( pipe_slow );
19849 %}
19850
19851 // ====================VECTOR ARITHMETIC=======================================
19852
19853 // --------------------------------- ADD --------------------------------------
19854
19855 // Bytes vector add
19856 instruct vaddB(vec dst, vec src) %{
19857 predicate(UseAVX == 0);
19858 match(Set dst (AddVB dst src));
19859 format %{ "paddb $dst,$src\t! add packedB" %}
19860 ins_encode %{
19861 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19862 %}
19863 ins_pipe( pipe_slow );
19864 %}
19865
19866 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19867 predicate(UseAVX > 0);
19868 match(Set dst (AddVB src1 src2));
19869 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19870 ins_encode %{
19871 int vlen_enc = vector_length_encoding(this);
19872 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19873 %}
19874 ins_pipe( pipe_slow );
19875 %}
19876
19877 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19878 predicate((UseAVX > 0) &&
19879 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19880 match(Set dst (AddVB src (LoadVector mem)));
19881 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19882 ins_encode %{
19883 int vlen_enc = vector_length_encoding(this);
19884 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19885 %}
19886 ins_pipe( pipe_slow );
19887 %}
19888
19889 // Shorts/Chars vector add
19890 instruct vaddS(vec dst, vec src) %{
19891 predicate(UseAVX == 0);
19892 match(Set dst (AddVS dst src));
19893 format %{ "paddw $dst,$src\t! add packedS" %}
19894 ins_encode %{
19895 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19896 %}
19897 ins_pipe( pipe_slow );
19898 %}
19899
19900 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19901 predicate(UseAVX > 0);
19902 match(Set dst (AddVS src1 src2));
19903 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19904 ins_encode %{
19905 int vlen_enc = vector_length_encoding(this);
19906 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19907 %}
19908 ins_pipe( pipe_slow );
19909 %}
19910
19911 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19912 predicate((UseAVX > 0) &&
19913 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19914 match(Set dst (AddVS src (LoadVector mem)));
19915 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19916 ins_encode %{
19917 int vlen_enc = vector_length_encoding(this);
19918 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19919 %}
19920 ins_pipe( pipe_slow );
19921 %}
19922
19923 // Integers vector add
19924 instruct vaddI(vec dst, vec src) %{
19925 predicate(UseAVX == 0);
19926 match(Set dst (AddVI dst src));
19927 format %{ "paddd $dst,$src\t! add packedI" %}
19928 ins_encode %{
19929 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19930 %}
19931 ins_pipe( pipe_slow );
19932 %}
19933
19934 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19935 predicate(UseAVX > 0);
19936 match(Set dst (AddVI src1 src2));
19937 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19938 ins_encode %{
19939 int vlen_enc = vector_length_encoding(this);
19940 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19941 %}
19942 ins_pipe( pipe_slow );
19943 %}
19944
19945
19946 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19947 predicate((UseAVX > 0) &&
19948 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19949 match(Set dst (AddVI src (LoadVector mem)));
19950 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19951 ins_encode %{
19952 int vlen_enc = vector_length_encoding(this);
19953 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19954 %}
19955 ins_pipe( pipe_slow );
19956 %}
19957
19958 // Longs vector add
19959 instruct vaddL(vec dst, vec src) %{
19960 predicate(UseAVX == 0);
19961 match(Set dst (AddVL dst src));
19962 format %{ "paddq $dst,$src\t! add packedL" %}
19963 ins_encode %{
19964 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19965 %}
19966 ins_pipe( pipe_slow );
19967 %}
19968
19969 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19970 predicate(UseAVX > 0);
19971 match(Set dst (AddVL src1 src2));
19972 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19973 ins_encode %{
19974 int vlen_enc = vector_length_encoding(this);
19975 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19976 %}
19977 ins_pipe( pipe_slow );
19978 %}
19979
19980 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19981 predicate((UseAVX > 0) &&
19982 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19983 match(Set dst (AddVL src (LoadVector mem)));
19984 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19985 ins_encode %{
19986 int vlen_enc = vector_length_encoding(this);
19987 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19988 %}
19989 ins_pipe( pipe_slow );
19990 %}
19991
19992 // Floats vector add
19993 instruct vaddF(vec dst, vec src) %{
19994 predicate(UseAVX == 0);
19995 match(Set dst (AddVF dst src));
19996 format %{ "addps $dst,$src\t! add packedF" %}
19997 ins_encode %{
19998 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19999 %}
20000 ins_pipe( pipe_slow );
20001 %}
20002
20003 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20004 predicate(UseAVX > 0);
20005 match(Set dst (AddVF src1 src2));
20006 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20007 ins_encode %{
20008 int vlen_enc = vector_length_encoding(this);
20009 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20010 %}
20011 ins_pipe( pipe_slow );
20012 %}
20013
20014 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20015 predicate((UseAVX > 0) &&
20016 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20017 match(Set dst (AddVF src (LoadVector mem)));
20018 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20019 ins_encode %{
20020 int vlen_enc = vector_length_encoding(this);
20021 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20022 %}
20023 ins_pipe( pipe_slow );
20024 %}
20025
20026 // Doubles vector add
20027 instruct vaddD(vec dst, vec src) %{
20028 predicate(UseAVX == 0);
20029 match(Set dst (AddVD dst src));
20030 format %{ "addpd $dst,$src\t! add packedD" %}
20031 ins_encode %{
20032 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20033 %}
20034 ins_pipe( pipe_slow );
20035 %}
20036
20037 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20038 predicate(UseAVX > 0);
20039 match(Set dst (AddVD src1 src2));
20040 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20041 ins_encode %{
20042 int vlen_enc = vector_length_encoding(this);
20043 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20044 %}
20045 ins_pipe( pipe_slow );
20046 %}
20047
20048 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20049 predicate((UseAVX > 0) &&
20050 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20051 match(Set dst (AddVD src (LoadVector mem)));
20052 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20053 ins_encode %{
20054 int vlen_enc = vector_length_encoding(this);
20055 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20056 %}
20057 ins_pipe( pipe_slow );
20058 %}
20059
20060 // --------------------------------- SUB --------------------------------------
20061
20062 // Bytes vector sub
20063 instruct vsubB(vec dst, vec src) %{
20064 predicate(UseAVX == 0);
20065 match(Set dst (SubVB dst src));
20066 format %{ "psubb $dst,$src\t! sub packedB" %}
20067 ins_encode %{
20068 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20069 %}
20070 ins_pipe( pipe_slow );
20071 %}
20072
20073 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20074 predicate(UseAVX > 0);
20075 match(Set dst (SubVB src1 src2));
20076 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20077 ins_encode %{
20078 int vlen_enc = vector_length_encoding(this);
20079 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20080 %}
20081 ins_pipe( pipe_slow );
20082 %}
20083
20084 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20085 predicate((UseAVX > 0) &&
20086 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20087 match(Set dst (SubVB src (LoadVector mem)));
20088 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20089 ins_encode %{
20090 int vlen_enc = vector_length_encoding(this);
20091 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20092 %}
20093 ins_pipe( pipe_slow );
20094 %}
20095
20096 // Shorts/Chars vector sub
20097 instruct vsubS(vec dst, vec src) %{
20098 predicate(UseAVX == 0);
20099 match(Set dst (SubVS dst src));
20100 format %{ "psubw $dst,$src\t! sub packedS" %}
20101 ins_encode %{
20102 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20103 %}
20104 ins_pipe( pipe_slow );
20105 %}
20106
20107
20108 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20109 predicate(UseAVX > 0);
20110 match(Set dst (SubVS src1 src2));
20111 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20112 ins_encode %{
20113 int vlen_enc = vector_length_encoding(this);
20114 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20115 %}
20116 ins_pipe( pipe_slow );
20117 %}
20118
20119 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20120 predicate((UseAVX > 0) &&
20121 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20122 match(Set dst (SubVS src (LoadVector mem)));
20123 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20124 ins_encode %{
20125 int vlen_enc = vector_length_encoding(this);
20126 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20127 %}
20128 ins_pipe( pipe_slow );
20129 %}
20130
20131 // Integers vector sub
20132 instruct vsubI(vec dst, vec src) %{
20133 predicate(UseAVX == 0);
20134 match(Set dst (SubVI dst src));
20135 format %{ "psubd $dst,$src\t! sub packedI" %}
20136 ins_encode %{
20137 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20138 %}
20139 ins_pipe( pipe_slow );
20140 %}
20141
20142 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20143 predicate(UseAVX > 0);
20144 match(Set dst (SubVI src1 src2));
20145 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20146 ins_encode %{
20147 int vlen_enc = vector_length_encoding(this);
20148 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20149 %}
20150 ins_pipe( pipe_slow );
20151 %}
20152
20153 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20154 predicate((UseAVX > 0) &&
20155 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20156 match(Set dst (SubVI src (LoadVector mem)));
20157 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20158 ins_encode %{
20159 int vlen_enc = vector_length_encoding(this);
20160 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20161 %}
20162 ins_pipe( pipe_slow );
20163 %}
20164
20165 // Longs vector sub
20166 instruct vsubL(vec dst, vec src) %{
20167 predicate(UseAVX == 0);
20168 match(Set dst (SubVL dst src));
20169 format %{ "psubq $dst,$src\t! sub packedL" %}
20170 ins_encode %{
20171 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20172 %}
20173 ins_pipe( pipe_slow );
20174 %}
20175
20176 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20177 predicate(UseAVX > 0);
20178 match(Set dst (SubVL src1 src2));
20179 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20180 ins_encode %{
20181 int vlen_enc = vector_length_encoding(this);
20182 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20183 %}
20184 ins_pipe( pipe_slow );
20185 %}
20186
20187
20188 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20189 predicate((UseAVX > 0) &&
20190 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20191 match(Set dst (SubVL src (LoadVector mem)));
20192 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20193 ins_encode %{
20194 int vlen_enc = vector_length_encoding(this);
20195 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20196 %}
20197 ins_pipe( pipe_slow );
20198 %}
20199
20200 // Floats vector sub
20201 instruct vsubF(vec dst, vec src) %{
20202 predicate(UseAVX == 0);
20203 match(Set dst (SubVF dst src));
20204 format %{ "subps $dst,$src\t! sub packedF" %}
20205 ins_encode %{
20206 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20207 %}
20208 ins_pipe( pipe_slow );
20209 %}
20210
20211 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20212 predicate(UseAVX > 0);
20213 match(Set dst (SubVF src1 src2));
20214 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20215 ins_encode %{
20216 int vlen_enc = vector_length_encoding(this);
20217 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20218 %}
20219 ins_pipe( pipe_slow );
20220 %}
20221
20222 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20223 predicate((UseAVX > 0) &&
20224 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20225 match(Set dst (SubVF src (LoadVector mem)));
20226 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20227 ins_encode %{
20228 int vlen_enc = vector_length_encoding(this);
20229 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20230 %}
20231 ins_pipe( pipe_slow );
20232 %}
20233
20234 // Doubles vector sub
20235 instruct vsubD(vec dst, vec src) %{
20236 predicate(UseAVX == 0);
20237 match(Set dst (SubVD dst src));
20238 format %{ "subpd $dst,$src\t! sub packedD" %}
20239 ins_encode %{
20240 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20241 %}
20242 ins_pipe( pipe_slow );
20243 %}
20244
20245 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20246 predicate(UseAVX > 0);
20247 match(Set dst (SubVD src1 src2));
20248 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20249 ins_encode %{
20250 int vlen_enc = vector_length_encoding(this);
20251 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20252 %}
20253 ins_pipe( pipe_slow );
20254 %}
20255
20256 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20257 predicate((UseAVX > 0) &&
20258 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20259 match(Set dst (SubVD src (LoadVector mem)));
20260 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20261 ins_encode %{
20262 int vlen_enc = vector_length_encoding(this);
20263 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20264 %}
20265 ins_pipe( pipe_slow );
20266 %}
20267
20268 // --------------------------------- MUL --------------------------------------
20269
20270 // Byte vector mul
20271 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20272 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20273 match(Set dst (MulVB src1 src2));
20274 effect(TEMP dst, TEMP xtmp);
20275 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20276 ins_encode %{
20277 assert(UseSSE > 3, "required");
20278 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20279 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20280 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20281 __ psllw($dst$$XMMRegister, 8);
20282 __ psrlw($dst$$XMMRegister, 8);
20283 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20284 %}
20285 ins_pipe( pipe_slow );
20286 %}
20287
20288 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20289 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20290 match(Set dst (MulVB src1 src2));
20291 effect(TEMP dst, TEMP xtmp);
20292 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20293 ins_encode %{
20294 assert(UseSSE > 3, "required");
20295 // Odd-index elements
20296 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20297 __ psrlw($dst$$XMMRegister, 8);
20298 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20299 __ psrlw($xtmp$$XMMRegister, 8);
20300 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20301 __ psllw($dst$$XMMRegister, 8);
20302 // Even-index elements
20303 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20304 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20305 __ psllw($xtmp$$XMMRegister, 8);
20306 __ psrlw($xtmp$$XMMRegister, 8);
20307 // Combine
20308 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20309 %}
20310 ins_pipe( pipe_slow );
20311 %}
20312
20313 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20314 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20315 match(Set dst (MulVB src1 src2));
20316 effect(TEMP xtmp1, TEMP xtmp2);
20317 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20318 ins_encode %{
20319 int vlen_enc = vector_length_encoding(this);
20320 // Odd-index elements
20321 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20322 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20323 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20324 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20325 // Even-index elements
20326 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20327 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20328 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20329 // Combine
20330 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20331 %}
20332 ins_pipe( pipe_slow );
20333 %}
20334
20335 // Shorts/Chars vector mul
20336 instruct vmulS(vec dst, vec src) %{
20337 predicate(UseAVX == 0);
20338 match(Set dst (MulVS dst src));
20339 format %{ "pmullw $dst,$src\t! mul packedS" %}
20340 ins_encode %{
20341 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20342 %}
20343 ins_pipe( pipe_slow );
20344 %}
20345
20346 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20347 predicate(UseAVX > 0);
20348 match(Set dst (MulVS src1 src2));
20349 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20350 ins_encode %{
20351 int vlen_enc = vector_length_encoding(this);
20352 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20353 %}
20354 ins_pipe( pipe_slow );
20355 %}
20356
20357 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20358 predicate((UseAVX > 0) &&
20359 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20360 match(Set dst (MulVS src (LoadVector mem)));
20361 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20362 ins_encode %{
20363 int vlen_enc = vector_length_encoding(this);
20364 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20365 %}
20366 ins_pipe( pipe_slow );
20367 %}
20368
20369 // Integers vector mul
20370 instruct vmulI(vec dst, vec src) %{
20371 predicate(UseAVX == 0);
20372 match(Set dst (MulVI dst src));
20373 format %{ "pmulld $dst,$src\t! mul packedI" %}
20374 ins_encode %{
20375 assert(UseSSE > 3, "required");
20376 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20377 %}
20378 ins_pipe( pipe_slow );
20379 %}
20380
20381 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20382 predicate(UseAVX > 0);
20383 match(Set dst (MulVI src1 src2));
20384 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20385 ins_encode %{
20386 int vlen_enc = vector_length_encoding(this);
20387 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20388 %}
20389 ins_pipe( pipe_slow );
20390 %}
20391
20392 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20393 predicate((UseAVX > 0) &&
20394 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20395 match(Set dst (MulVI src (LoadVector mem)));
20396 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20397 ins_encode %{
20398 int vlen_enc = vector_length_encoding(this);
20399 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20400 %}
20401 ins_pipe( pipe_slow );
20402 %}
20403
20404 // Longs vector mul
20405 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20406 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20407 VM_Version::supports_avx512dq()) ||
20408 VM_Version::supports_avx512vldq());
20409 match(Set dst (MulVL src1 src2));
20410 ins_cost(500);
20411 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20412 ins_encode %{
20413 assert(UseAVX > 2, "required");
20414 int vlen_enc = vector_length_encoding(this);
20415 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20416 %}
20417 ins_pipe( pipe_slow );
20418 %}
20419
20420 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20421 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20422 VM_Version::supports_avx512dq()) ||
20423 (Matcher::vector_length_in_bytes(n) > 8 &&
20424 VM_Version::supports_avx512vldq()));
20425 match(Set dst (MulVL src (LoadVector mem)));
20426 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20427 ins_cost(500);
20428 ins_encode %{
20429 assert(UseAVX > 2, "required");
20430 int vlen_enc = vector_length_encoding(this);
20431 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20432 %}
20433 ins_pipe( pipe_slow );
20434 %}
20435
20436 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20437 predicate(UseAVX == 0);
20438 match(Set dst (MulVL src1 src2));
20439 ins_cost(500);
20440 effect(TEMP dst, TEMP xtmp);
20441 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20442 ins_encode %{
20443 assert(VM_Version::supports_sse4_1(), "required");
20444 // Get the lo-hi products, only the lower 32 bits is in concerns
20445 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20446 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20447 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20448 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20449 __ psllq($dst$$XMMRegister, 32);
20450 // Get the lo-lo products
20451 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20452 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20453 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20454 %}
20455 ins_pipe( pipe_slow );
20456 %}
20457
20458 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20459 predicate(UseAVX > 0 &&
20460 ((Matcher::vector_length_in_bytes(n) == 64 &&
20461 !VM_Version::supports_avx512dq()) ||
20462 (Matcher::vector_length_in_bytes(n) < 64 &&
20463 !VM_Version::supports_avx512vldq())));
20464 match(Set dst (MulVL src1 src2));
20465 effect(TEMP xtmp1, TEMP xtmp2);
20466 ins_cost(500);
20467 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20468 ins_encode %{
20469 int vlen_enc = vector_length_encoding(this);
20470 // Get the lo-hi products, only the lower 32 bits is in concerns
20471 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20472 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20473 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20474 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20475 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20476 // Get the lo-lo products
20477 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20478 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20479 %}
20480 ins_pipe( pipe_slow );
20481 %}
20482
20483 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20484 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20485 match(Set dst (MulVL src1 src2));
20486 ins_cost(100);
20487 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20488 ins_encode %{
20489 int vlen_enc = vector_length_encoding(this);
20490 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20491 %}
20492 ins_pipe( pipe_slow );
20493 %}
20494
20495 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20496 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20497 match(Set dst (MulVL src1 src2));
20498 ins_cost(100);
20499 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20500 ins_encode %{
20501 int vlen_enc = vector_length_encoding(this);
20502 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20503 %}
20504 ins_pipe( pipe_slow );
20505 %}
20506
20507 // Floats vector mul
20508 instruct vmulF(vec dst, vec src) %{
20509 predicate(UseAVX == 0);
20510 match(Set dst (MulVF dst src));
20511 format %{ "mulps $dst,$src\t! mul packedF" %}
20512 ins_encode %{
20513 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20514 %}
20515 ins_pipe( pipe_slow );
20516 %}
20517
20518 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20519 predicate(UseAVX > 0);
20520 match(Set dst (MulVF src1 src2));
20521 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20522 ins_encode %{
20523 int vlen_enc = vector_length_encoding(this);
20524 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20525 %}
20526 ins_pipe( pipe_slow );
20527 %}
20528
20529 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20530 predicate((UseAVX > 0) &&
20531 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20532 match(Set dst (MulVF src (LoadVector mem)));
20533 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20534 ins_encode %{
20535 int vlen_enc = vector_length_encoding(this);
20536 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20537 %}
20538 ins_pipe( pipe_slow );
20539 %}
20540
20541 // Doubles vector mul
20542 instruct vmulD(vec dst, vec src) %{
20543 predicate(UseAVX == 0);
20544 match(Set dst (MulVD dst src));
20545 format %{ "mulpd $dst,$src\t! mul packedD" %}
20546 ins_encode %{
20547 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20548 %}
20549 ins_pipe( pipe_slow );
20550 %}
20551
20552 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20553 predicate(UseAVX > 0);
20554 match(Set dst (MulVD src1 src2));
20555 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20556 ins_encode %{
20557 int vlen_enc = vector_length_encoding(this);
20558 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20559 %}
20560 ins_pipe( pipe_slow );
20561 %}
20562
20563 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20564 predicate((UseAVX > 0) &&
20565 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20566 match(Set dst (MulVD src (LoadVector mem)));
20567 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20568 ins_encode %{
20569 int vlen_enc = vector_length_encoding(this);
20570 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20571 %}
20572 ins_pipe( pipe_slow );
20573 %}
20574
20575 // --------------------------------- DIV --------------------------------------
20576
20577 // Floats vector div
20578 instruct vdivF(vec dst, vec src) %{
20579 predicate(UseAVX == 0);
20580 match(Set dst (DivVF dst src));
20581 format %{ "divps $dst,$src\t! div packedF" %}
20582 ins_encode %{
20583 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20584 %}
20585 ins_pipe( pipe_slow );
20586 %}
20587
20588 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20589 predicate(UseAVX > 0);
20590 match(Set dst (DivVF src1 src2));
20591 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20592 ins_encode %{
20593 int vlen_enc = vector_length_encoding(this);
20594 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20595 %}
20596 ins_pipe( pipe_slow );
20597 %}
20598
20599 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20600 predicate((UseAVX > 0) &&
20601 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20602 match(Set dst (DivVF src (LoadVector mem)));
20603 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20604 ins_encode %{
20605 int vlen_enc = vector_length_encoding(this);
20606 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20607 %}
20608 ins_pipe( pipe_slow );
20609 %}
20610
20611 // Doubles vector div
20612 instruct vdivD(vec dst, vec src) %{
20613 predicate(UseAVX == 0);
20614 match(Set dst (DivVD dst src));
20615 format %{ "divpd $dst,$src\t! div packedD" %}
20616 ins_encode %{
20617 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20618 %}
20619 ins_pipe( pipe_slow );
20620 %}
20621
20622 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20623 predicate(UseAVX > 0);
20624 match(Set dst (DivVD src1 src2));
20625 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20626 ins_encode %{
20627 int vlen_enc = vector_length_encoding(this);
20628 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20629 %}
20630 ins_pipe( pipe_slow );
20631 %}
20632
20633 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20634 predicate((UseAVX > 0) &&
20635 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20636 match(Set dst (DivVD src (LoadVector mem)));
20637 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20638 ins_encode %{
20639 int vlen_enc = vector_length_encoding(this);
20640 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20641 %}
20642 ins_pipe( pipe_slow );
20643 %}
20644
20645 // ------------------------------ MinMax ---------------------------------------
20646
20647 // Byte, Short, Int vector Min/Max
20648 instruct minmax_reg_sse(vec dst, vec src) %{
20649 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20650 UseAVX == 0);
20651 match(Set dst (MinV dst src));
20652 match(Set dst (MaxV dst src));
20653 format %{ "vector_minmax $dst,$src\t! " %}
20654 ins_encode %{
20655 assert(UseSSE >= 4, "required");
20656
20657 int opcode = this->ideal_Opcode();
20658 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20659 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20660 %}
20661 ins_pipe( pipe_slow );
20662 %}
20663
20664 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20665 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20666 UseAVX > 0);
20667 match(Set dst (MinV src1 src2));
20668 match(Set dst (MaxV src1 src2));
20669 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20670 ins_encode %{
20671 int opcode = this->ideal_Opcode();
20672 int vlen_enc = vector_length_encoding(this);
20673 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20674
20675 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20676 %}
20677 ins_pipe( pipe_slow );
20678 %}
20679
20680 // Long vector Min/Max
20681 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20682 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20683 UseAVX == 0);
20684 match(Set dst (MinV dst src));
20685 match(Set dst (MaxV src dst));
20686 effect(TEMP dst, TEMP tmp);
20687 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20688 ins_encode %{
20689 assert(UseSSE >= 4, "required");
20690
20691 int opcode = this->ideal_Opcode();
20692 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20693 assert(elem_bt == T_LONG, "sanity");
20694
20695 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20696 %}
20697 ins_pipe( pipe_slow );
20698 %}
20699
20700 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20701 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20702 UseAVX > 0 && !VM_Version::supports_avx512vl());
20703 match(Set dst (MinV src1 src2));
20704 match(Set dst (MaxV src1 src2));
20705 effect(TEMP dst);
20706 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20707 ins_encode %{
20708 int vlen_enc = vector_length_encoding(this);
20709 int opcode = this->ideal_Opcode();
20710 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20711 assert(elem_bt == T_LONG, "sanity");
20712
20713 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20714 %}
20715 ins_pipe( pipe_slow );
20716 %}
20717
20718 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20719 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20720 Matcher::vector_element_basic_type(n) == T_LONG);
20721 match(Set dst (MinV src1 src2));
20722 match(Set dst (MaxV src1 src2));
20723 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20724 ins_encode %{
20725 assert(UseAVX > 2, "required");
20726
20727 int vlen_enc = vector_length_encoding(this);
20728 int opcode = this->ideal_Opcode();
20729 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20730 assert(elem_bt == T_LONG, "sanity");
20731
20732 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20733 %}
20734 ins_pipe( pipe_slow );
20735 %}
20736
20737 // Float/Double vector Min/Max
20738 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20739 predicate(VM_Version::supports_avx10_2() &&
20740 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20741 match(Set dst (MinV a b));
20742 match(Set dst (MaxV a b));
20743 format %{ "vector_minmaxFP $dst, $a, $b" %}
20744 ins_encode %{
20745 int vlen_enc = vector_length_encoding(this);
20746 int opcode = this->ideal_Opcode();
20747 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20748 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20749 %}
20750 ins_pipe( pipe_slow );
20751 %}
20752
20753 // Float/Double vector Min/Max
20754 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20755 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20756 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20757 UseAVX > 0);
20758 match(Set dst (MinV a b));
20759 match(Set dst (MaxV a b));
20760 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20761 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20762 ins_encode %{
20763 assert(UseAVX > 0, "required");
20764
20765 int opcode = this->ideal_Opcode();
20766 int vlen_enc = vector_length_encoding(this);
20767 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20768
20769 __ vminmax_fp(opcode, elem_bt,
20770 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20771 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20772 %}
20773 ins_pipe( pipe_slow );
20774 %}
20775
20776 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20777 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20778 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20779 match(Set dst (MinV a b));
20780 match(Set dst (MaxV a b));
20781 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20782 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20783 ins_encode %{
20784 assert(UseAVX > 2, "required");
20785
20786 int opcode = this->ideal_Opcode();
20787 int vlen_enc = vector_length_encoding(this);
20788 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20789
20790 __ evminmax_fp(opcode, elem_bt,
20791 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20792 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20793 %}
20794 ins_pipe( pipe_slow );
20795 %}
20796
20797 // ------------------------------ Unsigned vector Min/Max ----------------------
20798
20799 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20800 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20801 match(Set dst (UMinV a b));
20802 match(Set dst (UMaxV a b));
20803 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20804 ins_encode %{
20805 int opcode = this->ideal_Opcode();
20806 int vlen_enc = vector_length_encoding(this);
20807 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20808 assert(is_integral_type(elem_bt), "");
20809 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20810 %}
20811 ins_pipe( pipe_slow );
20812 %}
20813
20814 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20815 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20816 match(Set dst (UMinV a (LoadVector b)));
20817 match(Set dst (UMaxV a (LoadVector b)));
20818 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20819 ins_encode %{
20820 int opcode = this->ideal_Opcode();
20821 int vlen_enc = vector_length_encoding(this);
20822 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20823 assert(is_integral_type(elem_bt), "");
20824 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20825 %}
20826 ins_pipe( pipe_slow );
20827 %}
20828
20829 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20830 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20831 match(Set dst (UMinV a b));
20832 match(Set dst (UMaxV a b));
20833 effect(TEMP xtmp1, TEMP xtmp2);
20834 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20835 ins_encode %{
20836 int opcode = this->ideal_Opcode();
20837 int vlen_enc = vector_length_encoding(this);
20838 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20839 %}
20840 ins_pipe( pipe_slow );
20841 %}
20842
20843 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20844 match(Set dst (UMinV (Binary dst src2) mask));
20845 match(Set dst (UMaxV (Binary dst src2) mask));
20846 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20847 ins_encode %{
20848 int vlen_enc = vector_length_encoding(this);
20849 BasicType bt = Matcher::vector_element_basic_type(this);
20850 int opc = this->ideal_Opcode();
20851 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20852 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20853 %}
20854 ins_pipe( pipe_slow );
20855 %}
20856
20857 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20858 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20859 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20860 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20861 ins_encode %{
20862 int vlen_enc = vector_length_encoding(this);
20863 BasicType bt = Matcher::vector_element_basic_type(this);
20864 int opc = this->ideal_Opcode();
20865 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20866 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20867 %}
20868 ins_pipe( pipe_slow );
20869 %}
20870
20871 // --------------------------------- Signum/CopySign ---------------------------
20872
20873 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20874 match(Set dst (SignumF dst (Binary zero one)));
20875 effect(KILL cr);
20876 format %{ "signumF $dst, $dst" %}
20877 ins_encode %{
20878 int opcode = this->ideal_Opcode();
20879 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20880 %}
20881 ins_pipe( pipe_slow );
20882 %}
20883
20884 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20885 match(Set dst (SignumD dst (Binary zero one)));
20886 effect(KILL cr);
20887 format %{ "signumD $dst, $dst" %}
20888 ins_encode %{
20889 int opcode = this->ideal_Opcode();
20890 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20891 %}
20892 ins_pipe( pipe_slow );
20893 %}
20894
20895 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20896 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20897 match(Set dst (SignumVF src (Binary zero one)));
20898 match(Set dst (SignumVD src (Binary zero one)));
20899 effect(TEMP dst, TEMP xtmp1);
20900 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20901 ins_encode %{
20902 int opcode = this->ideal_Opcode();
20903 int vec_enc = vector_length_encoding(this);
20904 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20905 $xtmp1$$XMMRegister, vec_enc);
20906 %}
20907 ins_pipe( pipe_slow );
20908 %}
20909
20910 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20911 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20912 match(Set dst (SignumVF src (Binary zero one)));
20913 match(Set dst (SignumVD src (Binary zero one)));
20914 effect(TEMP dst, TEMP ktmp1);
20915 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20916 ins_encode %{
20917 int opcode = this->ideal_Opcode();
20918 int vec_enc = vector_length_encoding(this);
20919 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20920 $ktmp1$$KRegister, vec_enc);
20921 %}
20922 ins_pipe( pipe_slow );
20923 %}
20924
20925 // ---------------------------------------
20926 // For copySign use 0xE4 as writemask for vpternlog
20927 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20928 // C (xmm2) is set to 0x7FFFFFFF
20929 // Wherever xmm2 is 0, we want to pick from B (sign)
20930 // Wherever xmm2 is 1, we want to pick from A (src)
20931 //
20932 // A B C Result
20933 // 0 0 0 0
20934 // 0 0 1 0
20935 // 0 1 0 1
20936 // 0 1 1 0
20937 // 1 0 0 0
20938 // 1 0 1 1
20939 // 1 1 0 1
20940 // 1 1 1 1
20941 //
20942 // Result going from high bit to low bit is 0x11100100 = 0xe4
20943 // ---------------------------------------
20944
20945 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20946 match(Set dst (CopySignF dst src));
20947 effect(TEMP tmp1, TEMP tmp2);
20948 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20949 ins_encode %{
20950 __ movl($tmp2$$Register, 0x7FFFFFFF);
20951 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20952 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20953 %}
20954 ins_pipe( pipe_slow );
20955 %}
20956
20957 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20958 match(Set dst (CopySignD dst (Binary src zero)));
20959 ins_cost(100);
20960 effect(TEMP tmp1, TEMP tmp2);
20961 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20962 ins_encode %{
20963 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20964 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20965 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20966 %}
20967 ins_pipe( pipe_slow );
20968 %}
20969
20970 //----------------------------- CompressBits/ExpandBits ------------------------
20971
20972 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20973 predicate(n->bottom_type()->isa_int());
20974 match(Set dst (CompressBits src mask));
20975 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20976 ins_encode %{
20977 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20978 %}
20979 ins_pipe( pipe_slow );
20980 %}
20981
20982 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20983 predicate(n->bottom_type()->isa_int());
20984 match(Set dst (ExpandBits src mask));
20985 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20986 ins_encode %{
20987 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20988 %}
20989 ins_pipe( pipe_slow );
20990 %}
20991
20992 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20993 predicate(n->bottom_type()->isa_int());
20994 match(Set dst (CompressBits src (LoadI mask)));
20995 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20996 ins_encode %{
20997 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20998 %}
20999 ins_pipe( pipe_slow );
21000 %}
21001
21002 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21003 predicate(n->bottom_type()->isa_int());
21004 match(Set dst (ExpandBits src (LoadI mask)));
21005 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21006 ins_encode %{
21007 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21008 %}
21009 ins_pipe( pipe_slow );
21010 %}
21011
21012 // --------------------------------- Sqrt --------------------------------------
21013
21014 instruct vsqrtF_reg(vec dst, vec src) %{
21015 match(Set dst (SqrtVF src));
21016 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21017 ins_encode %{
21018 assert(UseAVX > 0, "required");
21019 int vlen_enc = vector_length_encoding(this);
21020 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21021 %}
21022 ins_pipe( pipe_slow );
21023 %}
21024
21025 instruct vsqrtF_mem(vec dst, memory mem) %{
21026 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21027 match(Set dst (SqrtVF (LoadVector mem)));
21028 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21029 ins_encode %{
21030 assert(UseAVX > 0, "required");
21031 int vlen_enc = vector_length_encoding(this);
21032 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21033 %}
21034 ins_pipe( pipe_slow );
21035 %}
21036
21037 // Floating point vector sqrt
21038 instruct vsqrtD_reg(vec dst, vec src) %{
21039 match(Set dst (SqrtVD src));
21040 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21041 ins_encode %{
21042 assert(UseAVX > 0, "required");
21043 int vlen_enc = vector_length_encoding(this);
21044 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21045 %}
21046 ins_pipe( pipe_slow );
21047 %}
21048
21049 instruct vsqrtD_mem(vec dst, memory mem) %{
21050 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21051 match(Set dst (SqrtVD (LoadVector mem)));
21052 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21053 ins_encode %{
21054 assert(UseAVX > 0, "required");
21055 int vlen_enc = vector_length_encoding(this);
21056 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21057 %}
21058 ins_pipe( pipe_slow );
21059 %}
21060
21061 // ------------------------------ Shift ---------------------------------------
21062
21063 // Left and right shift count vectors are the same on x86
21064 // (only lowest bits of xmm reg are used for count).
21065 instruct vshiftcnt(vec dst, rRegI cnt) %{
21066 match(Set dst (LShiftCntV cnt));
21067 match(Set dst (RShiftCntV cnt));
21068 format %{ "movdl $dst,$cnt\t! load shift count" %}
21069 ins_encode %{
21070 __ movdl($dst$$XMMRegister, $cnt$$Register);
21071 %}
21072 ins_pipe( pipe_slow );
21073 %}
21074
21075 // Byte vector shift
21076 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21077 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21078 match(Set dst ( LShiftVB src shift));
21079 match(Set dst ( RShiftVB src shift));
21080 match(Set dst (URShiftVB src shift));
21081 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21082 format %{"vector_byte_shift $dst,$src,$shift" %}
21083 ins_encode %{
21084 assert(UseSSE > 3, "required");
21085 int opcode = this->ideal_Opcode();
21086 bool sign = (opcode != Op_URShiftVB);
21087 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21088 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21089 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21090 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21091 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21092 %}
21093 ins_pipe( pipe_slow );
21094 %}
21095
21096 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21097 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21098 UseAVX <= 1);
21099 match(Set dst ( LShiftVB src shift));
21100 match(Set dst ( RShiftVB src shift));
21101 match(Set dst (URShiftVB src shift));
21102 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21103 format %{"vector_byte_shift $dst,$src,$shift" %}
21104 ins_encode %{
21105 assert(UseSSE > 3, "required");
21106 int opcode = this->ideal_Opcode();
21107 bool sign = (opcode != Op_URShiftVB);
21108 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21109 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21110 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21111 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21112 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21113 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21114 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21115 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21116 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21117 %}
21118 ins_pipe( pipe_slow );
21119 %}
21120
21121 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21122 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21123 UseAVX > 1);
21124 match(Set dst ( LShiftVB src shift));
21125 match(Set dst ( RShiftVB src shift));
21126 match(Set dst (URShiftVB src shift));
21127 effect(TEMP dst, TEMP tmp);
21128 format %{"vector_byte_shift $dst,$src,$shift" %}
21129 ins_encode %{
21130 int opcode = this->ideal_Opcode();
21131 bool sign = (opcode != Op_URShiftVB);
21132 int vlen_enc = Assembler::AVX_256bit;
21133 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21134 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21135 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21136 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21137 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21138 %}
21139 ins_pipe( pipe_slow );
21140 %}
21141
21142 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21143 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21144 match(Set dst ( LShiftVB src shift));
21145 match(Set dst ( RShiftVB src shift));
21146 match(Set dst (URShiftVB src shift));
21147 effect(TEMP dst, TEMP tmp);
21148 format %{"vector_byte_shift $dst,$src,$shift" %}
21149 ins_encode %{
21150 assert(UseAVX > 1, "required");
21151 int opcode = this->ideal_Opcode();
21152 bool sign = (opcode != Op_URShiftVB);
21153 int vlen_enc = Assembler::AVX_256bit;
21154 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21155 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21156 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21157 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21158 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21159 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21160 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21161 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21162 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21163 %}
21164 ins_pipe( pipe_slow );
21165 %}
21166
21167 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21168 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21169 match(Set dst ( LShiftVB src shift));
21170 match(Set dst (RShiftVB src shift));
21171 match(Set dst (URShiftVB src shift));
21172 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21173 format %{"vector_byte_shift $dst,$src,$shift" %}
21174 ins_encode %{
21175 assert(UseAVX > 2, "required");
21176 int opcode = this->ideal_Opcode();
21177 bool sign = (opcode != Op_URShiftVB);
21178 int vlen_enc = Assembler::AVX_512bit;
21179 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21180 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21181 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21182 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21183 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21184 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21185 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21186 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21187 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21188 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21189 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21190 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21191 %}
21192 ins_pipe( pipe_slow );
21193 %}
21194
21195 // Shorts vector logical right shift produces incorrect Java result
21196 // for negative data because java code convert short value into int with
21197 // sign extension before a shift. But char vectors are fine since chars are
21198 // unsigned values.
21199 // Shorts/Chars vector left shift
21200 instruct vshiftS(vec dst, vec src, vec shift) %{
21201 predicate(!n->as_ShiftV()->is_var_shift());
21202 match(Set dst ( LShiftVS src shift));
21203 match(Set dst ( RShiftVS src shift));
21204 match(Set dst (URShiftVS src shift));
21205 effect(TEMP dst, USE src, USE shift);
21206 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21207 ins_encode %{
21208 int opcode = this->ideal_Opcode();
21209 if (UseAVX > 0) {
21210 int vlen_enc = vector_length_encoding(this);
21211 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21212 } else {
21213 int vlen = Matcher::vector_length(this);
21214 if (vlen == 2) {
21215 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21216 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21217 } else if (vlen == 4) {
21218 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21219 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21220 } else {
21221 assert (vlen == 8, "sanity");
21222 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21223 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21224 }
21225 }
21226 %}
21227 ins_pipe( pipe_slow );
21228 %}
21229
21230 // Integers vector left shift
21231 instruct vshiftI(vec dst, vec src, vec shift) %{
21232 predicate(!n->as_ShiftV()->is_var_shift());
21233 match(Set dst ( LShiftVI src shift));
21234 match(Set dst ( RShiftVI src shift));
21235 match(Set dst (URShiftVI src shift));
21236 effect(TEMP dst, USE src, USE shift);
21237 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21238 ins_encode %{
21239 int opcode = this->ideal_Opcode();
21240 if (UseAVX > 0) {
21241 int vlen_enc = vector_length_encoding(this);
21242 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21243 } else {
21244 int vlen = Matcher::vector_length(this);
21245 if (vlen == 2) {
21246 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21247 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21248 } else {
21249 assert(vlen == 4, "sanity");
21250 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21251 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21252 }
21253 }
21254 %}
21255 ins_pipe( pipe_slow );
21256 %}
21257
21258 // Integers vector left constant shift
21259 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21260 match(Set dst (LShiftVI src (LShiftCntV shift)));
21261 match(Set dst (RShiftVI src (RShiftCntV shift)));
21262 match(Set dst (URShiftVI src (RShiftCntV shift)));
21263 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21264 ins_encode %{
21265 int opcode = this->ideal_Opcode();
21266 if (UseAVX > 0) {
21267 int vector_len = vector_length_encoding(this);
21268 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21269 } else {
21270 int vlen = Matcher::vector_length(this);
21271 if (vlen == 2) {
21272 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21273 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21274 } else {
21275 assert(vlen == 4, "sanity");
21276 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21277 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21278 }
21279 }
21280 %}
21281 ins_pipe( pipe_slow );
21282 %}
21283
21284 // Longs vector shift
21285 instruct vshiftL(vec dst, vec src, vec shift) %{
21286 predicate(!n->as_ShiftV()->is_var_shift());
21287 match(Set dst ( LShiftVL src shift));
21288 match(Set dst (URShiftVL src shift));
21289 effect(TEMP dst, USE src, USE shift);
21290 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21291 ins_encode %{
21292 int opcode = this->ideal_Opcode();
21293 if (UseAVX > 0) {
21294 int vlen_enc = vector_length_encoding(this);
21295 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21296 } else {
21297 assert(Matcher::vector_length(this) == 2, "");
21298 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21299 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21300 }
21301 %}
21302 ins_pipe( pipe_slow );
21303 %}
21304
21305 // Longs vector constant shift
21306 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21307 match(Set dst (LShiftVL src (LShiftCntV shift)));
21308 match(Set dst (URShiftVL src (RShiftCntV shift)));
21309 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21310 ins_encode %{
21311 int opcode = this->ideal_Opcode();
21312 if (UseAVX > 0) {
21313 int vector_len = vector_length_encoding(this);
21314 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21315 } else {
21316 assert(Matcher::vector_length(this) == 2, "");
21317 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21318 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21319 }
21320 %}
21321 ins_pipe( pipe_slow );
21322 %}
21323
21324 // -------------------ArithmeticRightShift -----------------------------------
21325 // Long vector arithmetic right shift
21326 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21327 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21328 match(Set dst (RShiftVL src shift));
21329 effect(TEMP dst, TEMP tmp);
21330 format %{ "vshiftq $dst,$src,$shift" %}
21331 ins_encode %{
21332 uint vlen = Matcher::vector_length(this);
21333 if (vlen == 2) {
21334 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21335 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21336 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21337 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21338 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21339 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21340 } else {
21341 assert(vlen == 4, "sanity");
21342 assert(UseAVX > 1, "required");
21343 int vlen_enc = Assembler::AVX_256bit;
21344 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21345 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21346 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21347 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21348 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21349 }
21350 %}
21351 ins_pipe( pipe_slow );
21352 %}
21353
21354 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21355 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21356 match(Set dst (RShiftVL src shift));
21357 format %{ "vshiftq $dst,$src,$shift" %}
21358 ins_encode %{
21359 int vlen_enc = vector_length_encoding(this);
21360 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21361 %}
21362 ins_pipe( pipe_slow );
21363 %}
21364
21365 // ------------------- Variable Shift -----------------------------
21366 // Byte variable shift
21367 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21368 predicate(Matcher::vector_length(n) <= 8 &&
21369 n->as_ShiftV()->is_var_shift() &&
21370 !VM_Version::supports_avx512bw());
21371 match(Set dst ( LShiftVB src shift));
21372 match(Set dst ( RShiftVB src shift));
21373 match(Set dst (URShiftVB src shift));
21374 effect(TEMP dst, TEMP vtmp);
21375 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21376 ins_encode %{
21377 assert(UseAVX >= 2, "required");
21378
21379 int opcode = this->ideal_Opcode();
21380 int vlen_enc = Assembler::AVX_128bit;
21381 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21382 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21383 %}
21384 ins_pipe( pipe_slow );
21385 %}
21386
21387 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21388 predicate(Matcher::vector_length(n) == 16 &&
21389 n->as_ShiftV()->is_var_shift() &&
21390 !VM_Version::supports_avx512bw());
21391 match(Set dst ( LShiftVB src shift));
21392 match(Set dst ( RShiftVB src shift));
21393 match(Set dst (URShiftVB src shift));
21394 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21395 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21396 ins_encode %{
21397 assert(UseAVX >= 2, "required");
21398
21399 int opcode = this->ideal_Opcode();
21400 int vlen_enc = Assembler::AVX_128bit;
21401 // Shift lower half and get word result in dst
21402 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21403
21404 // Shift upper half and get word result in vtmp1
21405 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21406 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21407 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21408
21409 // Merge and down convert the two word results to byte in dst
21410 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21411 %}
21412 ins_pipe( pipe_slow );
21413 %}
21414
21415 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21416 predicate(Matcher::vector_length(n) == 32 &&
21417 n->as_ShiftV()->is_var_shift() &&
21418 !VM_Version::supports_avx512bw());
21419 match(Set dst ( LShiftVB src shift));
21420 match(Set dst ( RShiftVB src shift));
21421 match(Set dst (URShiftVB src shift));
21422 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21423 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21424 ins_encode %{
21425 assert(UseAVX >= 2, "required");
21426
21427 int opcode = this->ideal_Opcode();
21428 int vlen_enc = Assembler::AVX_128bit;
21429 // Process lower 128 bits and get result in dst
21430 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21431 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21432 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21433 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21434 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21435
21436 // Process higher 128 bits and get result in vtmp3
21437 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21438 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21439 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21440 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21441 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21442 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21443 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21444
21445 // Merge the two results in dst
21446 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21447 %}
21448 ins_pipe( pipe_slow );
21449 %}
21450
21451 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21452 predicate(Matcher::vector_length(n) <= 32 &&
21453 n->as_ShiftV()->is_var_shift() &&
21454 VM_Version::supports_avx512bw());
21455 match(Set dst ( LShiftVB src shift));
21456 match(Set dst ( RShiftVB src shift));
21457 match(Set dst (URShiftVB src shift));
21458 effect(TEMP dst, TEMP vtmp);
21459 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21460 ins_encode %{
21461 assert(UseAVX > 2, "required");
21462
21463 int opcode = this->ideal_Opcode();
21464 int vlen_enc = vector_length_encoding(this);
21465 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21466 %}
21467 ins_pipe( pipe_slow );
21468 %}
21469
21470 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21471 predicate(Matcher::vector_length(n) == 64 &&
21472 n->as_ShiftV()->is_var_shift() &&
21473 VM_Version::supports_avx512bw());
21474 match(Set dst ( LShiftVB src shift));
21475 match(Set dst ( RShiftVB src shift));
21476 match(Set dst (URShiftVB src shift));
21477 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21478 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21479 ins_encode %{
21480 assert(UseAVX > 2, "required");
21481
21482 int opcode = this->ideal_Opcode();
21483 int vlen_enc = Assembler::AVX_256bit;
21484 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21485 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21486 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21487 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21488 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21489 %}
21490 ins_pipe( pipe_slow );
21491 %}
21492
21493 // Short variable shift
21494 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21495 predicate(Matcher::vector_length(n) <= 8 &&
21496 n->as_ShiftV()->is_var_shift() &&
21497 !VM_Version::supports_avx512bw());
21498 match(Set dst ( LShiftVS src shift));
21499 match(Set dst ( RShiftVS src shift));
21500 match(Set dst (URShiftVS src shift));
21501 effect(TEMP dst, TEMP vtmp);
21502 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21503 ins_encode %{
21504 assert(UseAVX >= 2, "required");
21505
21506 int opcode = this->ideal_Opcode();
21507 bool sign = (opcode != Op_URShiftVS);
21508 int vlen_enc = Assembler::AVX_256bit;
21509 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21510 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21511 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21512 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21513 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21514 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21515 %}
21516 ins_pipe( pipe_slow );
21517 %}
21518
21519 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21520 predicate(Matcher::vector_length(n) == 16 &&
21521 n->as_ShiftV()->is_var_shift() &&
21522 !VM_Version::supports_avx512bw());
21523 match(Set dst ( LShiftVS src shift));
21524 match(Set dst ( RShiftVS src shift));
21525 match(Set dst (URShiftVS src shift));
21526 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21527 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21528 ins_encode %{
21529 assert(UseAVX >= 2, "required");
21530
21531 int opcode = this->ideal_Opcode();
21532 bool sign = (opcode != Op_URShiftVS);
21533 int vlen_enc = Assembler::AVX_256bit;
21534 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21535 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21536 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21537 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21538 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21539
21540 // Shift upper half, with result in dst using vtmp1 as TEMP
21541 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21542 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21543 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21544 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21545 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21546 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21547
21548 // Merge lower and upper half result into dst
21549 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21550 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21551 %}
21552 ins_pipe( pipe_slow );
21553 %}
21554
21555 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21556 predicate(n->as_ShiftV()->is_var_shift() &&
21557 VM_Version::supports_avx512bw());
21558 match(Set dst ( LShiftVS src shift));
21559 match(Set dst ( RShiftVS src shift));
21560 match(Set dst (URShiftVS src shift));
21561 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21562 ins_encode %{
21563 assert(UseAVX > 2, "required");
21564
21565 int opcode = this->ideal_Opcode();
21566 int vlen_enc = vector_length_encoding(this);
21567 if (!VM_Version::supports_avx512vl()) {
21568 vlen_enc = Assembler::AVX_512bit;
21569 }
21570 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21571 %}
21572 ins_pipe( pipe_slow );
21573 %}
21574
21575 //Integer variable shift
21576 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21577 predicate(n->as_ShiftV()->is_var_shift());
21578 match(Set dst ( LShiftVI src shift));
21579 match(Set dst ( RShiftVI src shift));
21580 match(Set dst (URShiftVI src shift));
21581 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21582 ins_encode %{
21583 assert(UseAVX >= 2, "required");
21584
21585 int opcode = this->ideal_Opcode();
21586 int vlen_enc = vector_length_encoding(this);
21587 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21588 %}
21589 ins_pipe( pipe_slow );
21590 %}
21591
21592 //Long variable shift
21593 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21594 predicate(n->as_ShiftV()->is_var_shift());
21595 match(Set dst ( LShiftVL src shift));
21596 match(Set dst (URShiftVL src shift));
21597 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21598 ins_encode %{
21599 assert(UseAVX >= 2, "required");
21600
21601 int opcode = this->ideal_Opcode();
21602 int vlen_enc = vector_length_encoding(this);
21603 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21604 %}
21605 ins_pipe( pipe_slow );
21606 %}
21607
21608 //Long variable right shift arithmetic
21609 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21610 predicate(Matcher::vector_length(n) <= 4 &&
21611 n->as_ShiftV()->is_var_shift() &&
21612 UseAVX == 2);
21613 match(Set dst (RShiftVL src shift));
21614 effect(TEMP dst, TEMP vtmp);
21615 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21616 ins_encode %{
21617 int opcode = this->ideal_Opcode();
21618 int vlen_enc = vector_length_encoding(this);
21619 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21620 $vtmp$$XMMRegister);
21621 %}
21622 ins_pipe( pipe_slow );
21623 %}
21624
21625 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21626 predicate(n->as_ShiftV()->is_var_shift() &&
21627 UseAVX > 2);
21628 match(Set dst (RShiftVL src shift));
21629 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21630 ins_encode %{
21631 int opcode = this->ideal_Opcode();
21632 int vlen_enc = vector_length_encoding(this);
21633 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21634 %}
21635 ins_pipe( pipe_slow );
21636 %}
21637
21638 // --------------------------------- AND --------------------------------------
21639
21640 instruct vand(vec dst, vec src) %{
21641 predicate(UseAVX == 0);
21642 match(Set dst (AndV dst src));
21643 format %{ "pand $dst,$src\t! and vectors" %}
21644 ins_encode %{
21645 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21646 %}
21647 ins_pipe( pipe_slow );
21648 %}
21649
21650 instruct vand_reg(vec dst, vec src1, vec src2) %{
21651 predicate(UseAVX > 0);
21652 match(Set dst (AndV src1 src2));
21653 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21654 ins_encode %{
21655 int vlen_enc = vector_length_encoding(this);
21656 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21657 %}
21658 ins_pipe( pipe_slow );
21659 %}
21660
21661 instruct vand_mem(vec dst, vec src, memory mem) %{
21662 predicate((UseAVX > 0) &&
21663 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21664 match(Set dst (AndV src (LoadVector mem)));
21665 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21666 ins_encode %{
21667 int vlen_enc = vector_length_encoding(this);
21668 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21669 %}
21670 ins_pipe( pipe_slow );
21671 %}
21672
21673 // --------------------------------- OR ---------------------------------------
21674
21675 instruct vor(vec dst, vec src) %{
21676 predicate(UseAVX == 0);
21677 match(Set dst (OrV dst src));
21678 format %{ "por $dst,$src\t! or vectors" %}
21679 ins_encode %{
21680 __ por($dst$$XMMRegister, $src$$XMMRegister);
21681 %}
21682 ins_pipe( pipe_slow );
21683 %}
21684
21685 instruct vor_reg(vec dst, vec src1, vec src2) %{
21686 predicate(UseAVX > 0);
21687 match(Set dst (OrV src1 src2));
21688 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21689 ins_encode %{
21690 int vlen_enc = vector_length_encoding(this);
21691 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21692 %}
21693 ins_pipe( pipe_slow );
21694 %}
21695
21696 instruct vor_mem(vec dst, vec src, memory mem) %{
21697 predicate((UseAVX > 0) &&
21698 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21699 match(Set dst (OrV src (LoadVector mem)));
21700 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21701 ins_encode %{
21702 int vlen_enc = vector_length_encoding(this);
21703 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21704 %}
21705 ins_pipe( pipe_slow );
21706 %}
21707
21708 // --------------------------------- XOR --------------------------------------
21709
21710 instruct vxor(vec dst, vec src) %{
21711 predicate(UseAVX == 0);
21712 match(Set dst (XorV dst src));
21713 format %{ "pxor $dst,$src\t! xor vectors" %}
21714 ins_encode %{
21715 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21716 %}
21717 ins_pipe( pipe_slow );
21718 %}
21719
21720 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21721 predicate(UseAVX > 0);
21722 match(Set dst (XorV src1 src2));
21723 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21724 ins_encode %{
21725 int vlen_enc = vector_length_encoding(this);
21726 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21727 %}
21728 ins_pipe( pipe_slow );
21729 %}
21730
21731 instruct vxor_mem(vec dst, vec src, memory mem) %{
21732 predicate((UseAVX > 0) &&
21733 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21734 match(Set dst (XorV src (LoadVector mem)));
21735 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21736 ins_encode %{
21737 int vlen_enc = vector_length_encoding(this);
21738 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21739 %}
21740 ins_pipe( pipe_slow );
21741 %}
21742
21743 // --------------------------------- VectorCast --------------------------------------
21744
21745 instruct vcastBtoX(vec dst, vec src) %{
21746 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21747 match(Set dst (VectorCastB2X src));
21748 format %{ "vector_cast_b2x $dst,$src\t!" %}
21749 ins_encode %{
21750 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21751 int vlen_enc = vector_length_encoding(this);
21752 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21753 %}
21754 ins_pipe( pipe_slow );
21755 %}
21756
21757 instruct vcastBtoD(legVec dst, legVec src) %{
21758 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21759 match(Set dst (VectorCastB2X src));
21760 format %{ "vector_cast_b2x $dst,$src\t!" %}
21761 ins_encode %{
21762 int vlen_enc = vector_length_encoding(this);
21763 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21764 %}
21765 ins_pipe( pipe_slow );
21766 %}
21767
21768 instruct castStoX(vec dst, vec src) %{
21769 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21770 Matcher::vector_length(n->in(1)) <= 8 && // src
21771 Matcher::vector_element_basic_type(n) == T_BYTE);
21772 match(Set dst (VectorCastS2X src));
21773 format %{ "vector_cast_s2x $dst,$src" %}
21774 ins_encode %{
21775 assert(UseAVX > 0, "required");
21776
21777 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21778 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21779 %}
21780 ins_pipe( pipe_slow );
21781 %}
21782
21783 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21784 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21785 Matcher::vector_length(n->in(1)) == 16 && // src
21786 Matcher::vector_element_basic_type(n) == T_BYTE);
21787 effect(TEMP dst, TEMP vtmp);
21788 match(Set dst (VectorCastS2X src));
21789 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21790 ins_encode %{
21791 assert(UseAVX > 0, "required");
21792
21793 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21794 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21795 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21796 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21797 %}
21798 ins_pipe( pipe_slow );
21799 %}
21800
21801 instruct vcastStoX_evex(vec dst, vec src) %{
21802 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21803 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21804 match(Set dst (VectorCastS2X src));
21805 format %{ "vector_cast_s2x $dst,$src\t!" %}
21806 ins_encode %{
21807 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21808 int src_vlen_enc = vector_length_encoding(this, $src);
21809 int vlen_enc = vector_length_encoding(this);
21810 switch (to_elem_bt) {
21811 case T_BYTE:
21812 if (!VM_Version::supports_avx512vl()) {
21813 vlen_enc = Assembler::AVX_512bit;
21814 }
21815 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21816 break;
21817 case T_INT:
21818 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21819 break;
21820 case T_FLOAT:
21821 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21822 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21823 break;
21824 case T_LONG:
21825 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21826 break;
21827 case T_DOUBLE: {
21828 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21829 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21830 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21831 break;
21832 }
21833 default:
21834 ShouldNotReachHere();
21835 }
21836 %}
21837 ins_pipe( pipe_slow );
21838 %}
21839
21840 instruct castItoX(vec dst, vec src) %{
21841 predicate(UseAVX <= 2 &&
21842 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21843 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21844 match(Set dst (VectorCastI2X src));
21845 format %{ "vector_cast_i2x $dst,$src" %}
21846 ins_encode %{
21847 assert(UseAVX > 0, "required");
21848
21849 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21850 int vlen_enc = vector_length_encoding(this, $src);
21851
21852 if (to_elem_bt == T_BYTE) {
21853 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21854 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21855 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21856 } else {
21857 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21858 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21859 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21860 }
21861 %}
21862 ins_pipe( pipe_slow );
21863 %}
21864
21865 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21866 predicate(UseAVX <= 2 &&
21867 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21868 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21869 match(Set dst (VectorCastI2X src));
21870 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21871 effect(TEMP dst, TEMP vtmp);
21872 ins_encode %{
21873 assert(UseAVX > 0, "required");
21874
21875 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21876 int vlen_enc = vector_length_encoding(this, $src);
21877
21878 if (to_elem_bt == T_BYTE) {
21879 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21880 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21881 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21882 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21883 } else {
21884 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21885 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21886 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21887 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21888 }
21889 %}
21890 ins_pipe( pipe_slow );
21891 %}
21892
21893 instruct vcastItoX_evex(vec dst, vec src) %{
21894 predicate(UseAVX > 2 ||
21895 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21896 match(Set dst (VectorCastI2X src));
21897 format %{ "vector_cast_i2x $dst,$src\t!" %}
21898 ins_encode %{
21899 assert(UseAVX > 0, "required");
21900
21901 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21902 int src_vlen_enc = vector_length_encoding(this, $src);
21903 int dst_vlen_enc = vector_length_encoding(this);
21904 switch (dst_elem_bt) {
21905 case T_BYTE:
21906 if (!VM_Version::supports_avx512vl()) {
21907 src_vlen_enc = Assembler::AVX_512bit;
21908 }
21909 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21910 break;
21911 case T_SHORT:
21912 if (!VM_Version::supports_avx512vl()) {
21913 src_vlen_enc = Assembler::AVX_512bit;
21914 }
21915 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21916 break;
21917 case T_FLOAT:
21918 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21919 break;
21920 case T_LONG:
21921 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21922 break;
21923 case T_DOUBLE:
21924 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21925 break;
21926 default:
21927 ShouldNotReachHere();
21928 }
21929 %}
21930 ins_pipe( pipe_slow );
21931 %}
21932
21933 instruct vcastLtoBS(vec dst, vec src) %{
21934 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21935 UseAVX <= 2);
21936 match(Set dst (VectorCastL2X src));
21937 format %{ "vector_cast_l2x $dst,$src" %}
21938 ins_encode %{
21939 assert(UseAVX > 0, "required");
21940
21941 int vlen = Matcher::vector_length_in_bytes(this, $src);
21942 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21943 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21944 : ExternalAddress(vector_int_to_short_mask());
21945 if (vlen <= 16) {
21946 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21947 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21948 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21949 } else {
21950 assert(vlen <= 32, "required");
21951 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21952 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21953 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21954 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21955 }
21956 if (to_elem_bt == T_BYTE) {
21957 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21958 }
21959 %}
21960 ins_pipe( pipe_slow );
21961 %}
21962
21963 instruct vcastLtoX_evex(vec dst, vec src) %{
21964 predicate(UseAVX > 2 ||
21965 (Matcher::vector_element_basic_type(n) == T_INT ||
21966 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21967 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21968 match(Set dst (VectorCastL2X src));
21969 format %{ "vector_cast_l2x $dst,$src\t!" %}
21970 ins_encode %{
21971 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21972 int vlen = Matcher::vector_length_in_bytes(this, $src);
21973 int vlen_enc = vector_length_encoding(this, $src);
21974 switch (to_elem_bt) {
21975 case T_BYTE:
21976 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21977 vlen_enc = Assembler::AVX_512bit;
21978 }
21979 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21980 break;
21981 case T_SHORT:
21982 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21983 vlen_enc = Assembler::AVX_512bit;
21984 }
21985 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21986 break;
21987 case T_INT:
21988 if (vlen == 8) {
21989 if ($dst$$XMMRegister != $src$$XMMRegister) {
21990 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21991 }
21992 } else if (vlen == 16) {
21993 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21994 } else if (vlen == 32) {
21995 if (UseAVX > 2) {
21996 if (!VM_Version::supports_avx512vl()) {
21997 vlen_enc = Assembler::AVX_512bit;
21998 }
21999 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22000 } else {
22001 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22002 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22003 }
22004 } else { // vlen == 64
22005 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22006 }
22007 break;
22008 case T_FLOAT:
22009 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22010 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22011 break;
22012 case T_DOUBLE:
22013 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22014 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22015 break;
22016
22017 default: assert(false, "%s", type2name(to_elem_bt));
22018 }
22019 %}
22020 ins_pipe( pipe_slow );
22021 %}
22022
22023 instruct vcastFtoD_reg(vec dst, vec src) %{
22024 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22025 match(Set dst (VectorCastF2X src));
22026 format %{ "vector_cast_f2d $dst,$src\t!" %}
22027 ins_encode %{
22028 int vlen_enc = vector_length_encoding(this);
22029 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22030 %}
22031 ins_pipe( pipe_slow );
22032 %}
22033
22034
22035 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22036 predicate(!VM_Version::supports_avx10_2() &&
22037 !VM_Version::supports_avx512vl() &&
22038 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22039 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22040 is_integral_type(Matcher::vector_element_basic_type(n)));
22041 match(Set dst (VectorCastF2X src));
22042 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22043 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22044 ins_encode %{
22045 int vlen_enc = vector_length_encoding(this, $src);
22046 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22047 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22048 // 32 bit addresses for register indirect addressing mode since stub constants
22049 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22050 // However, targets are free to increase this limit, but having a large code cache size
22051 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22052 // cap we save a temporary register allocation which in limiting case can prevent
22053 // spilling in high register pressure blocks.
22054 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22055 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22056 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22057 %}
22058 ins_pipe( pipe_slow );
22059 %}
22060
22061 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22062 predicate(!VM_Version::supports_avx10_2() &&
22063 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22064 is_integral_type(Matcher::vector_element_basic_type(n)));
22065 match(Set dst (VectorCastF2X src));
22066 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22067 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22068 ins_encode %{
22069 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22070 if (to_elem_bt == T_LONG) {
22071 int vlen_enc = vector_length_encoding(this);
22072 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22073 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22074 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22075 } else {
22076 int vlen_enc = vector_length_encoding(this, $src);
22077 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22078 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22079 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22080 }
22081 %}
22082 ins_pipe( pipe_slow );
22083 %}
22084
22085 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22086 predicate(VM_Version::supports_avx10_2() &&
22087 is_integral_type(Matcher::vector_element_basic_type(n)));
22088 match(Set dst (VectorCastF2X src));
22089 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22090 ins_encode %{
22091 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22092 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22093 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22094 %}
22095 ins_pipe( pipe_slow );
22096 %}
22097
22098 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22099 predicate(VM_Version::supports_avx10_2() &&
22100 is_integral_type(Matcher::vector_element_basic_type(n)));
22101 match(Set dst (VectorCastF2X (LoadVector src)));
22102 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22103 ins_encode %{
22104 int vlen = Matcher::vector_length(this);
22105 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22106 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22107 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22108 %}
22109 ins_pipe( pipe_slow );
22110 %}
22111
22112 instruct vcastDtoF_reg(vec dst, vec src) %{
22113 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22114 match(Set dst (VectorCastD2X src));
22115 format %{ "vector_cast_d2x $dst,$src\t!" %}
22116 ins_encode %{
22117 int vlen_enc = vector_length_encoding(this, $src);
22118 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22119 %}
22120 ins_pipe( pipe_slow );
22121 %}
22122
22123 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22124 predicate(!VM_Version::supports_avx10_2() &&
22125 !VM_Version::supports_avx512vl() &&
22126 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22127 is_integral_type(Matcher::vector_element_basic_type(n)));
22128 match(Set dst (VectorCastD2X src));
22129 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22130 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22131 ins_encode %{
22132 int vlen_enc = vector_length_encoding(this, $src);
22133 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22134 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22135 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22136 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22137 %}
22138 ins_pipe( pipe_slow );
22139 %}
22140
22141 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22142 predicate(!VM_Version::supports_avx10_2() &&
22143 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22144 is_integral_type(Matcher::vector_element_basic_type(n)));
22145 match(Set dst (VectorCastD2X src));
22146 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22147 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22148 ins_encode %{
22149 int vlen_enc = vector_length_encoding(this, $src);
22150 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22151 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22152 ExternalAddress(vector_float_signflip());
22153 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22154 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22155 %}
22156 ins_pipe( pipe_slow );
22157 %}
22158
22159 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22160 predicate(VM_Version::supports_avx10_2() &&
22161 is_integral_type(Matcher::vector_element_basic_type(n)));
22162 match(Set dst (VectorCastD2X src));
22163 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22164 ins_encode %{
22165 int vlen_enc = vector_length_encoding(this, $src);
22166 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22167 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22168 %}
22169 ins_pipe( pipe_slow );
22170 %}
22171
22172 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22173 predicate(VM_Version::supports_avx10_2() &&
22174 is_integral_type(Matcher::vector_element_basic_type(n)));
22175 match(Set dst (VectorCastD2X (LoadVector src)));
22176 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22177 ins_encode %{
22178 int vlen = Matcher::vector_length(this);
22179 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22180 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22181 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22182 %}
22183 ins_pipe( pipe_slow );
22184 %}
22185
22186 instruct vucast(vec dst, vec src) %{
22187 match(Set dst (VectorUCastB2X src));
22188 match(Set dst (VectorUCastS2X src));
22189 match(Set dst (VectorUCastI2X src));
22190 format %{ "vector_ucast $dst,$src\t!" %}
22191 ins_encode %{
22192 assert(UseAVX > 0, "required");
22193
22194 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22195 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22196 int vlen_enc = vector_length_encoding(this);
22197 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22198 %}
22199 ins_pipe( pipe_slow );
22200 %}
22201
22202 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22203 predicate(!VM_Version::supports_avx512vl() &&
22204 Matcher::vector_length_in_bytes(n) < 64 &&
22205 Matcher::vector_element_basic_type(n) == T_INT);
22206 match(Set dst (RoundVF src));
22207 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22208 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22209 ins_encode %{
22210 int vlen_enc = vector_length_encoding(this);
22211 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22212 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22213 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22214 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22215 %}
22216 ins_pipe( pipe_slow );
22217 %}
22218
22219 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22220 predicate((VM_Version::supports_avx512vl() ||
22221 Matcher::vector_length_in_bytes(n) == 64) &&
22222 Matcher::vector_element_basic_type(n) == T_INT);
22223 match(Set dst (RoundVF src));
22224 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22225 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22226 ins_encode %{
22227 int vlen_enc = vector_length_encoding(this);
22228 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22229 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22230 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22231 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22232 %}
22233 ins_pipe( pipe_slow );
22234 %}
22235
22236 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22237 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22238 match(Set dst (RoundVD src));
22239 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22240 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22241 ins_encode %{
22242 int vlen_enc = vector_length_encoding(this);
22243 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22244 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22245 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22246 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22247 %}
22248 ins_pipe( pipe_slow );
22249 %}
22250
22251 // --------------------------------- VectorMaskCmp --------------------------------------
22252
22253 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22254 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22255 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22256 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22257 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22258 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22259 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22260 ins_encode %{
22261 int vlen_enc = vector_length_encoding(this, $src1);
22262 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22263 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22264 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22265 } else {
22266 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22267 }
22268 %}
22269 ins_pipe( pipe_slow );
22270 %}
22271
22272 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22273 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22274 n->bottom_type()->isa_pvectmask() == nullptr &&
22275 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22276 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22277 effect(TEMP ktmp);
22278 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22279 ins_encode %{
22280 int vlen_enc = Assembler::AVX_512bit;
22281 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22282 KRegister mask = k0; // The comparison itself is not being masked.
22283 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22284 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22285 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22286 } else {
22287 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22288 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22289 }
22290 %}
22291 ins_pipe( pipe_slow );
22292 %}
22293
22294 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22295 predicate(n->bottom_type()->isa_pvectmask() &&
22296 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22297 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22298 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22299 ins_encode %{
22300 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22301 int vlen_enc = vector_length_encoding(this, $src1);
22302 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22303 KRegister mask = k0; // The comparison itself is not being masked.
22304 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22305 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22306 } else {
22307 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22308 }
22309 %}
22310 ins_pipe( pipe_slow );
22311 %}
22312
22313 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22314 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22315 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22316 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22317 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22318 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22319 (n->in(2)->get_int() == BoolTest::eq ||
22320 n->in(2)->get_int() == BoolTest::lt ||
22321 n->in(2)->get_int() == BoolTest::gt)); // cond
22322 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22323 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22324 ins_encode %{
22325 int vlen_enc = vector_length_encoding(this, $src1);
22326 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22327 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22328 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22329 %}
22330 ins_pipe( pipe_slow );
22331 %}
22332
22333 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22334 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22335 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22336 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22337 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22338 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22339 (n->in(2)->get_int() == BoolTest::ne ||
22340 n->in(2)->get_int() == BoolTest::le ||
22341 n->in(2)->get_int() == BoolTest::ge)); // cond
22342 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22343 effect(TEMP dst, TEMP xtmp);
22344 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22345 ins_encode %{
22346 int vlen_enc = vector_length_encoding(this, $src1);
22347 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22348 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22349 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22350 %}
22351 ins_pipe( pipe_slow );
22352 %}
22353
22354 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22355 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22356 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22357 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22358 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22359 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22360 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22361 effect(TEMP dst, TEMP xtmp);
22362 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22363 ins_encode %{
22364 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22365 int vlen_enc = vector_length_encoding(this, $src1);
22366 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22367 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22368
22369 if (vlen_enc == Assembler::AVX_128bit) {
22370 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22371 } else {
22372 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22373 }
22374 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22375 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22376 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22377 %}
22378 ins_pipe( pipe_slow );
22379 %}
22380
22381 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22382 predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22383 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22384 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22385 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22386 effect(TEMP ktmp);
22387 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22388 ins_encode %{
22389 assert(UseAVX > 2, "required");
22390
22391 int vlen_enc = vector_length_encoding(this, $src1);
22392 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22393 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22394 KRegister mask = k0; // The comparison itself is not being masked.
22395 bool merge = false;
22396 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22397
22398 switch (src1_elem_bt) {
22399 case T_INT: {
22400 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22401 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22402 break;
22403 }
22404 case T_LONG: {
22405 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22406 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22407 break;
22408 }
22409 default: assert(false, "%s", type2name(src1_elem_bt));
22410 }
22411 %}
22412 ins_pipe( pipe_slow );
22413 %}
22414
22415
22416 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22417 predicate(n->bottom_type()->isa_pvectmask() &&
22418 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22419 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22420 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22421 ins_encode %{
22422 assert(UseAVX > 2, "required");
22423 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22424
22425 int vlen_enc = vector_length_encoding(this, $src1);
22426 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22427 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22428 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22429
22430 // Comparison i
22431 switch (src1_elem_bt) {
22432 case T_BYTE: {
22433 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22434 break;
22435 }
22436 case T_SHORT: {
22437 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22438 break;
22439 }
22440 case T_INT: {
22441 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22442 break;
22443 }
22444 case T_LONG: {
22445 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22446 break;
22447 }
22448 default: assert(false, "%s", type2name(src1_elem_bt));
22449 }
22450 %}
22451 ins_pipe( pipe_slow );
22452 %}
22453
22454 // Extract
22455
22456 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22457 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22458 match(Set dst (ExtractI src idx));
22459 match(Set dst (ExtractS src idx));
22460 match(Set dst (ExtractB src idx));
22461 format %{ "extractI $dst,$src,$idx\t!" %}
22462 ins_encode %{
22463 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22464
22465 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22466 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22467 %}
22468 ins_pipe( pipe_slow );
22469 %}
22470
22471 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22472 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22473 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22474 match(Set dst (ExtractI src idx));
22475 match(Set dst (ExtractS src idx));
22476 match(Set dst (ExtractB src idx));
22477 effect(TEMP vtmp);
22478 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22479 ins_encode %{
22480 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22481
22482 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22483 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22484 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22485 %}
22486 ins_pipe( pipe_slow );
22487 %}
22488
22489 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22490 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22491 match(Set dst (ExtractL src idx));
22492 format %{ "extractL $dst,$src,$idx\t!" %}
22493 ins_encode %{
22494 assert(UseSSE >= 4, "required");
22495 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22496
22497 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22498 %}
22499 ins_pipe( pipe_slow );
22500 %}
22501
22502 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22503 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22504 Matcher::vector_length(n->in(1)) == 8); // src
22505 match(Set dst (ExtractL src idx));
22506 effect(TEMP vtmp);
22507 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22508 ins_encode %{
22509 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22510
22511 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22512 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22513 %}
22514 ins_pipe( pipe_slow );
22515 %}
22516
22517 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22518 predicate(Matcher::vector_length(n->in(1)) <= 4);
22519 match(Set dst (ExtractF src idx));
22520 effect(TEMP dst, TEMP vtmp);
22521 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22522 ins_encode %{
22523 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22524
22525 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22526 %}
22527 ins_pipe( pipe_slow );
22528 %}
22529
22530 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22531 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22532 Matcher::vector_length(n->in(1)/*src*/) == 16);
22533 match(Set dst (ExtractF src idx));
22534 effect(TEMP vtmp);
22535 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22536 ins_encode %{
22537 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22538
22539 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22540 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22541 %}
22542 ins_pipe( pipe_slow );
22543 %}
22544
22545 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22546 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22547 match(Set dst (ExtractD src idx));
22548 format %{ "extractD $dst,$src,$idx\t!" %}
22549 ins_encode %{
22550 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22551
22552 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22553 %}
22554 ins_pipe( pipe_slow );
22555 %}
22556
22557 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22558 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22559 Matcher::vector_length(n->in(1)) == 8); // src
22560 match(Set dst (ExtractD src idx));
22561 effect(TEMP vtmp);
22562 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22563 ins_encode %{
22564 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22565
22566 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22567 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22568 %}
22569 ins_pipe( pipe_slow );
22570 %}
22571
22572 // --------------------------------- Vector Blend --------------------------------------
22573
22574 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22575 predicate(UseAVX == 0);
22576 match(Set dst (VectorBlend (Binary dst src) mask));
22577 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22578 effect(TEMP tmp);
22579 ins_encode %{
22580 assert(UseSSE >= 4, "required");
22581
22582 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22583 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22584 }
22585 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22586 %}
22587 ins_pipe( pipe_slow );
22588 %}
22589
22590 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22591 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22592 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22593 Matcher::vector_length_in_bytes(n) <= 32 &&
22594 is_integral_type(Matcher::vector_element_basic_type(n)));
22595 match(Set dst (VectorBlend (Binary src1 src2) mask));
22596 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22597 ins_encode %{
22598 int vlen_enc = vector_length_encoding(this);
22599 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22600 %}
22601 ins_pipe( pipe_slow );
22602 %}
22603
22604 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22605 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22606 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22607 Matcher::vector_length_in_bytes(n) <= 32 &&
22608 !is_integral_type(Matcher::vector_element_basic_type(n)));
22609 match(Set dst (VectorBlend (Binary src1 src2) mask));
22610 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22611 ins_encode %{
22612 int vlen_enc = vector_length_encoding(this);
22613 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22614 %}
22615 ins_pipe( pipe_slow );
22616 %}
22617
22618 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22619 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22620 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22621 Matcher::vector_length_in_bytes(n) <= 32);
22622 match(Set dst (VectorBlend (Binary src1 src2) mask));
22623 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22624 effect(TEMP vtmp, TEMP dst);
22625 ins_encode %{
22626 int vlen_enc = vector_length_encoding(this);
22627 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22628 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22629 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22630 %}
22631 ins_pipe( pipe_slow );
22632 %}
22633
22634 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22635 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22636 n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22637 match(Set dst (VectorBlend (Binary src1 src2) mask));
22638 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22639 effect(TEMP ktmp);
22640 ins_encode %{
22641 int vlen_enc = Assembler::AVX_512bit;
22642 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22643 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22644 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22645 %}
22646 ins_pipe( pipe_slow );
22647 %}
22648
22649
22650 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22651 predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22652 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22653 VM_Version::supports_avx512bw()));
22654 match(Set dst (VectorBlend (Binary src1 src2) mask));
22655 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22656 ins_encode %{
22657 int vlen_enc = vector_length_encoding(this);
22658 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22659 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22660 %}
22661 ins_pipe( pipe_slow );
22662 %}
22663
22664 // --------------------------------- ABS --------------------------------------
22665 // a = |a|
22666 instruct vabsB_reg(vec dst, vec src) %{
22667 match(Set dst (AbsVB src));
22668 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22669 ins_encode %{
22670 uint vlen = Matcher::vector_length(this);
22671 if (vlen <= 16) {
22672 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22673 } else {
22674 int vlen_enc = vector_length_encoding(this);
22675 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22676 }
22677 %}
22678 ins_pipe( pipe_slow );
22679 %}
22680
22681 instruct vabsS_reg(vec dst, vec src) %{
22682 match(Set dst (AbsVS src));
22683 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22684 ins_encode %{
22685 uint vlen = Matcher::vector_length(this);
22686 if (vlen <= 8) {
22687 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22688 } else {
22689 int vlen_enc = vector_length_encoding(this);
22690 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22691 }
22692 %}
22693 ins_pipe( pipe_slow );
22694 %}
22695
22696 instruct vabsI_reg(vec dst, vec src) %{
22697 match(Set dst (AbsVI src));
22698 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22699 ins_encode %{
22700 uint vlen = Matcher::vector_length(this);
22701 if (vlen <= 4) {
22702 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22703 } else {
22704 int vlen_enc = vector_length_encoding(this);
22705 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22706 }
22707 %}
22708 ins_pipe( pipe_slow );
22709 %}
22710
22711 instruct vabsL_reg(vec dst, vec src) %{
22712 match(Set dst (AbsVL src));
22713 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22714 ins_encode %{
22715 assert(UseAVX > 2, "required");
22716 int vlen_enc = vector_length_encoding(this);
22717 if (!VM_Version::supports_avx512vl()) {
22718 vlen_enc = Assembler::AVX_512bit;
22719 }
22720 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22721 %}
22722 ins_pipe( pipe_slow );
22723 %}
22724
22725 // --------------------------------- ABSNEG --------------------------------------
22726
22727 instruct vabsnegF(vec dst, vec src) %{
22728 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22729 match(Set dst (AbsVF src));
22730 match(Set dst (NegVF src));
22731 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22732 ins_cost(150);
22733 ins_encode %{
22734 int opcode = this->ideal_Opcode();
22735 int vlen = Matcher::vector_length(this);
22736 if (vlen == 2) {
22737 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22738 } else {
22739 assert(vlen == 8 || vlen == 16, "required");
22740 int vlen_enc = vector_length_encoding(this);
22741 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22742 }
22743 %}
22744 ins_pipe( pipe_slow );
22745 %}
22746
22747 instruct vabsneg4F(vec dst) %{
22748 predicate(Matcher::vector_length(n) == 4);
22749 match(Set dst (AbsVF dst));
22750 match(Set dst (NegVF dst));
22751 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22752 ins_cost(150);
22753 ins_encode %{
22754 int opcode = this->ideal_Opcode();
22755 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22756 %}
22757 ins_pipe( pipe_slow );
22758 %}
22759
22760 instruct vabsnegD(vec dst, vec src) %{
22761 match(Set dst (AbsVD src));
22762 match(Set dst (NegVD src));
22763 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22764 ins_encode %{
22765 int opcode = this->ideal_Opcode();
22766 uint vlen = Matcher::vector_length(this);
22767 if (vlen == 2) {
22768 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22769 } else {
22770 int vlen_enc = vector_length_encoding(this);
22771 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22772 }
22773 %}
22774 ins_pipe( pipe_slow );
22775 %}
22776
22777 //------------------------------------- VectorTest --------------------------------------------
22778
22779 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22780 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22781 match(Set cr (VectorTest src1 src2));
22782 effect(TEMP vtmp);
22783 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22784 ins_encode %{
22785 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22786 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22787 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22788 %}
22789 ins_pipe( pipe_slow );
22790 %}
22791
22792 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22793 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22794 match(Set cr (VectorTest src1 src2));
22795 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22796 ins_encode %{
22797 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22798 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22799 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22800 %}
22801 ins_pipe( pipe_slow );
22802 %}
22803
22804 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22805 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22806 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22807 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22808 match(Set cr (VectorTest src1 src2));
22809 effect(TEMP tmp);
22810 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22811 ins_encode %{
22812 uint masklen = Matcher::vector_length(this, $src1);
22813 __ kmovwl($tmp$$Register, $src1$$KRegister);
22814 __ andl($tmp$$Register, (1 << masklen) - 1);
22815 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22816 %}
22817 ins_pipe( pipe_slow );
22818 %}
22819
22820 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22821 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22822 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22823 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22824 match(Set cr (VectorTest src1 src2));
22825 effect(TEMP tmp);
22826 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22827 ins_encode %{
22828 uint masklen = Matcher::vector_length(this, $src1);
22829 __ kmovwl($tmp$$Register, $src1$$KRegister);
22830 __ andl($tmp$$Register, (1 << masklen) - 1);
22831 %}
22832 ins_pipe( pipe_slow );
22833 %}
22834
22835 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22836 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22837 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22838 match(Set cr (VectorTest src1 src2));
22839 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22840 ins_encode %{
22841 uint masklen = Matcher::vector_length(this, $src1);
22842 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22843 %}
22844 ins_pipe( pipe_slow );
22845 %}
22846
22847 //------------------------------------- LoadMask --------------------------------------------
22848
22849 instruct loadMask(legVec dst, legVec src) %{
22850 predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22851 match(Set dst (VectorLoadMask src));
22852 effect(TEMP dst);
22853 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22854 ins_encode %{
22855 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22856 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22857 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22858 %}
22859 ins_pipe( pipe_slow );
22860 %}
22861
22862 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22863 predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22864 match(Set dst (VectorLoadMask src));
22865 effect(TEMP xtmp);
22866 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22867 ins_encode %{
22868 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22869 true, Assembler::AVX_512bit);
22870 %}
22871 ins_pipe( pipe_slow );
22872 %}
22873
22874 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22875 predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22876 match(Set dst (VectorLoadMask src));
22877 effect(TEMP xtmp);
22878 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22879 ins_encode %{
22880 int vlen_enc = vector_length_encoding(in(1));
22881 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22882 false, vlen_enc);
22883 %}
22884 ins_pipe( pipe_slow );
22885 %}
22886
22887 //------------------------------------- StoreMask --------------------------------------------
22888
22889 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22890 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22891 match(Set dst (VectorStoreMask src size));
22892 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22893 ins_encode %{
22894 int vlen = Matcher::vector_length(this);
22895 if (vlen <= 16 && UseAVX <= 2) {
22896 assert(UseSSE >= 3, "required");
22897 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22898 } else {
22899 assert(UseAVX > 0, "required");
22900 int src_vlen_enc = vector_length_encoding(this, $src);
22901 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22902 }
22903 %}
22904 ins_pipe( pipe_slow );
22905 %}
22906
22907 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22908 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22909 match(Set dst (VectorStoreMask src size));
22910 effect(TEMP_DEF dst, TEMP xtmp);
22911 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22912 ins_encode %{
22913 int vlen_enc = Assembler::AVX_128bit;
22914 int vlen = Matcher::vector_length(this);
22915 if (vlen <= 8) {
22916 assert(UseSSE >= 3, "required");
22917 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22918 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22919 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22920 } else {
22921 assert(UseAVX > 0, "required");
22922 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22923 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22924 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22925 }
22926 %}
22927 ins_pipe( pipe_slow );
22928 %}
22929
22930 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22931 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22932 match(Set dst (VectorStoreMask src size));
22933 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22934 effect(TEMP_DEF dst, TEMP xtmp);
22935 ins_encode %{
22936 int vlen_enc = Assembler::AVX_128bit;
22937 int vlen = Matcher::vector_length(this);
22938 if (vlen <= 4) {
22939 assert(UseSSE >= 3, "required");
22940 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22941 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22942 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22943 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22944 } else {
22945 assert(UseAVX > 0, "required");
22946 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22947 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22948 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22949 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22950 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22951 }
22952 %}
22953 ins_pipe( pipe_slow );
22954 %}
22955
22956 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22957 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22958 match(Set dst (VectorStoreMask src size));
22959 effect(TEMP_DEF dst, TEMP xtmp);
22960 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22961 ins_encode %{
22962 assert(UseSSE >= 3, "required");
22963 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22964 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22965 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22966 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22967 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22968 %}
22969 ins_pipe( pipe_slow );
22970 %}
22971
22972 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22973 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22974 match(Set dst (VectorStoreMask src size));
22975 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22976 effect(TEMP_DEF dst, TEMP vtmp);
22977 ins_encode %{
22978 int vlen_enc = Assembler::AVX_128bit;
22979 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22980 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22981 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22982 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22983 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22984 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22985 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22986 %}
22987 ins_pipe( pipe_slow );
22988 %}
22989
22990 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22991 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22992 match(Set dst (VectorStoreMask src size));
22993 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22994 ins_encode %{
22995 int src_vlen_enc = vector_length_encoding(this, $src);
22996 int dst_vlen_enc = vector_length_encoding(this);
22997 if (!VM_Version::supports_avx512vl()) {
22998 src_vlen_enc = Assembler::AVX_512bit;
22999 }
23000 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23001 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23002 %}
23003 ins_pipe( pipe_slow );
23004 %}
23005
23006 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23007 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23008 match(Set dst (VectorStoreMask src size));
23009 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23010 ins_encode %{
23011 int src_vlen_enc = vector_length_encoding(this, $src);
23012 int dst_vlen_enc = vector_length_encoding(this);
23013 if (!VM_Version::supports_avx512vl()) {
23014 src_vlen_enc = Assembler::AVX_512bit;
23015 }
23016 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23017 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23018 %}
23019 ins_pipe( pipe_slow );
23020 %}
23021
23022 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23023 predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23024 match(Set dst (VectorStoreMask mask size));
23025 effect(TEMP_DEF dst);
23026 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23027 ins_encode %{
23028 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23029 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23030 false, Assembler::AVX_512bit, noreg);
23031 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23032 %}
23033 ins_pipe( pipe_slow );
23034 %}
23035
23036 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23037 predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23038 match(Set dst (VectorStoreMask mask size));
23039 effect(TEMP_DEF dst);
23040 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23041 ins_encode %{
23042 int dst_vlen_enc = vector_length_encoding(this);
23043 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23044 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23045 %}
23046 ins_pipe( pipe_slow );
23047 %}
23048
23049 instruct vmaskcast_evex(kReg dst) %{
23050 match(Set dst (VectorMaskCast dst));
23051 ins_cost(0);
23052 format %{ "vector_mask_cast $dst" %}
23053 ins_encode %{
23054 // empty
23055 %}
23056 ins_pipe(empty);
23057 %}
23058
23059 instruct vmaskcast(vec dst) %{
23060 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23061 match(Set dst (VectorMaskCast dst));
23062 ins_cost(0);
23063 format %{ "vector_mask_cast $dst" %}
23064 ins_encode %{
23065 // empty
23066 %}
23067 ins_pipe(empty);
23068 %}
23069
23070 instruct vmaskcast_avx(vec dst, vec src) %{
23071 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23072 match(Set dst (VectorMaskCast src));
23073 format %{ "vector_mask_cast $dst, $src" %}
23074 ins_encode %{
23075 int vlen = Matcher::vector_length(this);
23076 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23077 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23078 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23079 %}
23080 ins_pipe(pipe_slow);
23081 %}
23082
23083 //-------------------------------- Load Iota Indices ----------------------------------
23084
23085 instruct loadIotaIndices(vec dst, immI_0 src) %{
23086 match(Set dst (VectorLoadConst src));
23087 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23088 ins_encode %{
23089 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23090 BasicType bt = Matcher::vector_element_basic_type(this);
23091 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23092 %}
23093 ins_pipe( pipe_slow );
23094 %}
23095
23096 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23097 match(Set dst (PopulateIndex src1 src2));
23098 effect(TEMP dst, TEMP vtmp);
23099 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23100 ins_encode %{
23101 assert($src2$$constant == 1, "required");
23102 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23103 int vlen_enc = vector_length_encoding(this);
23104 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23105 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23106 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23107 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23108 %}
23109 ins_pipe( pipe_slow );
23110 %}
23111
23112 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23113 match(Set dst (PopulateIndex src1 src2));
23114 effect(TEMP dst, TEMP vtmp);
23115 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23116 ins_encode %{
23117 assert($src2$$constant == 1, "required");
23118 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23119 int vlen_enc = vector_length_encoding(this);
23120 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23121 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23122 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23123 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23124 %}
23125 ins_pipe( pipe_slow );
23126 %}
23127
23128 //-------------------------------- Rearrange ----------------------------------
23129
23130 // LoadShuffle/Rearrange for Byte
23131 instruct rearrangeB(vec dst, vec shuffle) %{
23132 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23133 Matcher::vector_length(n) < 32);
23134 match(Set dst (VectorRearrange dst shuffle));
23135 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23136 ins_encode %{
23137 assert(UseSSE >= 4, "required");
23138 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23139 %}
23140 ins_pipe( pipe_slow );
23141 %}
23142
23143 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23144 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23145 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23146 match(Set dst (VectorRearrange src shuffle));
23147 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23148 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23149 ins_encode %{
23150 assert(UseAVX >= 2, "required");
23151 // Swap src into vtmp1
23152 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23153 // Shuffle swapped src to get entries from other 128 bit lane
23154 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23155 // Shuffle original src to get entries from self 128 bit lane
23156 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23157 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23158 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23159 // Perform the blend
23160 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23161 %}
23162 ins_pipe( pipe_slow );
23163 %}
23164
23165
23166 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23167 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23168 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23169 match(Set dst (VectorRearrange src shuffle));
23170 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23171 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23172 ins_encode %{
23173 int vlen_enc = vector_length_encoding(this);
23174 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23175 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23176 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23177 %}
23178 ins_pipe( pipe_slow );
23179 %}
23180
23181 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23182 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23183 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23184 match(Set dst (VectorRearrange src shuffle));
23185 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23186 ins_encode %{
23187 int vlen_enc = vector_length_encoding(this);
23188 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23189 %}
23190 ins_pipe( pipe_slow );
23191 %}
23192
23193 // LoadShuffle/Rearrange for Short
23194
23195 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23196 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23197 !VM_Version::supports_avx512bw());
23198 match(Set dst (VectorLoadShuffle src));
23199 effect(TEMP dst, TEMP vtmp);
23200 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23201 ins_encode %{
23202 // Create a byte shuffle mask from short shuffle mask
23203 // only byte shuffle instruction available on these platforms
23204 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23205 if (UseAVX == 0) {
23206 assert(vlen_in_bytes <= 16, "required");
23207 // Multiply each shuffle by two to get byte index
23208 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23209 __ psllw($vtmp$$XMMRegister, 1);
23210
23211 // Duplicate to create 2 copies of byte index
23212 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23213 __ psllw($dst$$XMMRegister, 8);
23214 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23215
23216 // Add one to get alternate byte index
23217 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23218 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23219 } else {
23220 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23221 int vlen_enc = vector_length_encoding(this);
23222 // Multiply each shuffle by two to get byte index
23223 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23224
23225 // Duplicate to create 2 copies of byte index
23226 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23227 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23228
23229 // Add one to get alternate byte index
23230 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23231 }
23232 %}
23233 ins_pipe( pipe_slow );
23234 %}
23235
23236 instruct rearrangeS(vec dst, vec shuffle) %{
23237 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23238 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23239 match(Set dst (VectorRearrange dst shuffle));
23240 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23241 ins_encode %{
23242 assert(UseSSE >= 4, "required");
23243 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23244 %}
23245 ins_pipe( pipe_slow );
23246 %}
23247
23248 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23249 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23250 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23251 match(Set dst (VectorRearrange src shuffle));
23252 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23253 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23254 ins_encode %{
23255 assert(UseAVX >= 2, "required");
23256 // Swap src into vtmp1
23257 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23258 // Shuffle swapped src to get entries from other 128 bit lane
23259 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23260 // Shuffle original src to get entries from self 128 bit lane
23261 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23262 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23263 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23264 // Perform the blend
23265 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23266 %}
23267 ins_pipe( pipe_slow );
23268 %}
23269
23270 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23271 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23272 VM_Version::supports_avx512bw());
23273 match(Set dst (VectorRearrange src shuffle));
23274 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23275 ins_encode %{
23276 int vlen_enc = vector_length_encoding(this);
23277 if (!VM_Version::supports_avx512vl()) {
23278 vlen_enc = Assembler::AVX_512bit;
23279 }
23280 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23281 %}
23282 ins_pipe( pipe_slow );
23283 %}
23284
23285 // LoadShuffle/Rearrange for Integer and Float
23286
23287 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23288 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23289 Matcher::vector_length(n) == 4 && UseAVX == 0);
23290 match(Set dst (VectorLoadShuffle src));
23291 effect(TEMP dst, TEMP vtmp);
23292 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23293 ins_encode %{
23294 assert(UseSSE >= 4, "required");
23295
23296 // Create a byte shuffle mask from int shuffle mask
23297 // only byte shuffle instruction available on these platforms
23298
23299 // Duplicate and multiply each shuffle by 4
23300 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23301 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23302 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23303 __ psllw($vtmp$$XMMRegister, 2);
23304
23305 // Duplicate again to create 4 copies of byte index
23306 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23307 __ psllw($dst$$XMMRegister, 8);
23308 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23309
23310 // Add 3,2,1,0 to get alternate byte index
23311 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23312 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23313 %}
23314 ins_pipe( pipe_slow );
23315 %}
23316
23317 instruct rearrangeI(vec dst, vec shuffle) %{
23318 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23319 UseAVX == 0);
23320 match(Set dst (VectorRearrange dst shuffle));
23321 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23322 ins_encode %{
23323 assert(UseSSE >= 4, "required");
23324 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23325 %}
23326 ins_pipe( pipe_slow );
23327 %}
23328
23329 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23330 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23331 UseAVX > 0);
23332 match(Set dst (VectorRearrange src shuffle));
23333 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23334 ins_encode %{
23335 int vlen_enc = vector_length_encoding(this);
23336 BasicType bt = Matcher::vector_element_basic_type(this);
23337 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23338 %}
23339 ins_pipe( pipe_slow );
23340 %}
23341
23342 // LoadShuffle/Rearrange for Long and Double
23343
23344 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23345 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23346 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23347 match(Set dst (VectorLoadShuffle src));
23348 effect(TEMP dst, TEMP vtmp);
23349 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23350 ins_encode %{
23351 assert(UseAVX >= 2, "required");
23352
23353 int vlen_enc = vector_length_encoding(this);
23354 // Create a double word shuffle mask from long shuffle mask
23355 // only double word shuffle instruction available on these platforms
23356
23357 // Multiply each shuffle by two to get double word index
23358 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23359
23360 // Duplicate each double word shuffle
23361 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23362 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23363
23364 // Add one to get alternate double word index
23365 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23366 %}
23367 ins_pipe( pipe_slow );
23368 %}
23369
23370 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23371 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23372 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23373 match(Set dst (VectorRearrange src shuffle));
23374 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23375 ins_encode %{
23376 assert(UseAVX >= 2, "required");
23377
23378 int vlen_enc = vector_length_encoding(this);
23379 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23380 %}
23381 ins_pipe( pipe_slow );
23382 %}
23383
23384 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23385 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23386 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23387 match(Set dst (VectorRearrange src shuffle));
23388 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23389 ins_encode %{
23390 assert(UseAVX > 2, "required");
23391
23392 int vlen_enc = vector_length_encoding(this);
23393 if (vlen_enc == Assembler::AVX_128bit) {
23394 vlen_enc = Assembler::AVX_256bit;
23395 }
23396 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23397 %}
23398 ins_pipe( pipe_slow );
23399 %}
23400
23401 // --------------------------------- FMA --------------------------------------
23402 // a * b + c
23403
23404 instruct vfmaF_reg(vec a, vec b, vec c) %{
23405 match(Set c (FmaVF c (Binary a b)));
23406 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23407 ins_cost(150);
23408 ins_encode %{
23409 assert(UseFMA, "not enabled");
23410 int vlen_enc = vector_length_encoding(this);
23411 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23412 %}
23413 ins_pipe( pipe_slow );
23414 %}
23415
23416 instruct vfmaF_mem(vec a, memory b, vec c) %{
23417 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23418 match(Set c (FmaVF c (Binary a (LoadVector b))));
23419 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23420 ins_cost(150);
23421 ins_encode %{
23422 assert(UseFMA, "not enabled");
23423 int vlen_enc = vector_length_encoding(this);
23424 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23425 %}
23426 ins_pipe( pipe_slow );
23427 %}
23428
23429 instruct vfmaD_reg(vec a, vec b, vec c) %{
23430 match(Set c (FmaVD c (Binary a b)));
23431 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23432 ins_cost(150);
23433 ins_encode %{
23434 assert(UseFMA, "not enabled");
23435 int vlen_enc = vector_length_encoding(this);
23436 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23437 %}
23438 ins_pipe( pipe_slow );
23439 %}
23440
23441 instruct vfmaD_mem(vec a, memory b, vec c) %{
23442 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23443 match(Set c (FmaVD c (Binary a (LoadVector b))));
23444 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23445 ins_cost(150);
23446 ins_encode %{
23447 assert(UseFMA, "not enabled");
23448 int vlen_enc = vector_length_encoding(this);
23449 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23450 %}
23451 ins_pipe( pipe_slow );
23452 %}
23453
23454 // --------------------------------- Vector Multiply Add --------------------------------------
23455
23456 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23457 predicate(UseAVX == 0);
23458 match(Set dst (MulAddVS2VI dst src1));
23459 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23460 ins_encode %{
23461 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23462 %}
23463 ins_pipe( pipe_slow );
23464 %}
23465
23466 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23467 predicate(UseAVX > 0);
23468 match(Set dst (MulAddVS2VI src1 src2));
23469 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23470 ins_encode %{
23471 int vlen_enc = vector_length_encoding(this);
23472 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23473 %}
23474 ins_pipe( pipe_slow );
23475 %}
23476
23477 // --------------------------------- Vector Multiply Add Add ----------------------------------
23478
23479 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23480 predicate(VM_Version::supports_avx512_vnni());
23481 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23482 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23483 ins_encode %{
23484 assert(UseAVX > 2, "required");
23485 int vlen_enc = vector_length_encoding(this);
23486 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23487 %}
23488 ins_pipe( pipe_slow );
23489 ins_cost(10);
23490 %}
23491
23492 // --------------------------------- PopCount --------------------------------------
23493
23494 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23495 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23496 match(Set dst (PopCountVI src));
23497 match(Set dst (PopCountVL src));
23498 format %{ "vector_popcount_integral $dst, $src" %}
23499 ins_encode %{
23500 int opcode = this->ideal_Opcode();
23501 int vlen_enc = vector_length_encoding(this, $src);
23502 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23503 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23504 %}
23505 ins_pipe( pipe_slow );
23506 %}
23507
23508 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23509 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23510 match(Set dst (PopCountVI src mask));
23511 match(Set dst (PopCountVL src mask));
23512 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23513 ins_encode %{
23514 int vlen_enc = vector_length_encoding(this, $src);
23515 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23516 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23517 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23518 %}
23519 ins_pipe( pipe_slow );
23520 %}
23521
23522 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23523 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23524 match(Set dst (PopCountVI src));
23525 match(Set dst (PopCountVL src));
23526 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23527 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23528 ins_encode %{
23529 int opcode = this->ideal_Opcode();
23530 int vlen_enc = vector_length_encoding(this, $src);
23531 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23532 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23533 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23534 %}
23535 ins_pipe( pipe_slow );
23536 %}
23537
23538 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23539
23540 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23541 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23542 Matcher::vector_length_in_bytes(n->in(1))));
23543 match(Set dst (CountTrailingZerosV src));
23544 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23545 ins_cost(400);
23546 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23547 ins_encode %{
23548 int vlen_enc = vector_length_encoding(this, $src);
23549 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23550 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23551 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23552 %}
23553 ins_pipe( pipe_slow );
23554 %}
23555
23556 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23557 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23558 VM_Version::supports_avx512cd() &&
23559 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23560 match(Set dst (CountTrailingZerosV src));
23561 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23562 ins_cost(400);
23563 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23564 ins_encode %{
23565 int vlen_enc = vector_length_encoding(this, $src);
23566 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23567 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23568 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23569 %}
23570 ins_pipe( pipe_slow );
23571 %}
23572
23573 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23574 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23575 match(Set dst (CountTrailingZerosV src));
23576 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23577 ins_cost(400);
23578 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23579 ins_encode %{
23580 int vlen_enc = vector_length_encoding(this, $src);
23581 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23582 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23583 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23584 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23585 %}
23586 ins_pipe( pipe_slow );
23587 %}
23588
23589 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23590 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23591 match(Set dst (CountTrailingZerosV src));
23592 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23593 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23594 ins_encode %{
23595 int vlen_enc = vector_length_encoding(this, $src);
23596 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23597 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23598 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23599 %}
23600 ins_pipe( pipe_slow );
23601 %}
23602
23603
23604 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23605
23606 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23607 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23608 effect(TEMP dst);
23609 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23610 ins_encode %{
23611 int vector_len = vector_length_encoding(this);
23612 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23613 %}
23614 ins_pipe( pipe_slow );
23615 %}
23616
23617 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23618 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23619 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23620 effect(TEMP dst);
23621 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23622 ins_encode %{
23623 int vector_len = vector_length_encoding(this);
23624 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23625 %}
23626 ins_pipe( pipe_slow );
23627 %}
23628
23629 // --------------------------------- Rotation Operations ----------------------------------
23630 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23631 match(Set dst (RotateLeftV src shift));
23632 match(Set dst (RotateRightV src shift));
23633 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23634 ins_encode %{
23635 int opcode = this->ideal_Opcode();
23636 int vector_len = vector_length_encoding(this);
23637 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23638 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23639 %}
23640 ins_pipe( pipe_slow );
23641 %}
23642
23643 instruct vprorate(vec dst, vec src, vec shift) %{
23644 match(Set dst (RotateLeftV src shift));
23645 match(Set dst (RotateRightV src shift));
23646 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23647 ins_encode %{
23648 int opcode = this->ideal_Opcode();
23649 int vector_len = vector_length_encoding(this);
23650 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23651 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23652 %}
23653 ins_pipe( pipe_slow );
23654 %}
23655
23656 // ---------------------------------- Masked Operations ------------------------------------
23657 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23658 predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23659 match(Set dst (LoadVectorMasked mem mask));
23660 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23661 ins_encode %{
23662 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23663 int vlen_enc = vector_length_encoding(this);
23664 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23665 %}
23666 ins_pipe( pipe_slow );
23667 %}
23668
23669
23670 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23671 predicate(n->in(3)->bottom_type()->isa_pvectmask());
23672 match(Set dst (LoadVectorMasked mem mask));
23673 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23674 ins_encode %{
23675 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23676 int vector_len = vector_length_encoding(this);
23677 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23678 %}
23679 ins_pipe( pipe_slow );
23680 %}
23681
23682 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23683 predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23684 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23685 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23686 ins_encode %{
23687 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23688 int vlen_enc = vector_length_encoding(src_node);
23689 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23690 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23691 %}
23692 ins_pipe( pipe_slow );
23693 %}
23694
23695 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23696 predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23697 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23698 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23699 ins_encode %{
23700 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23701 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23702 int vlen_enc = vector_length_encoding(src_node);
23703 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23704 %}
23705 ins_pipe( pipe_slow );
23706 %}
23707
23708 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23709 match(Set addr (VerifyVectorAlignment addr mask));
23710 effect(KILL cr);
23711 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23712 ins_encode %{
23713 Label Lskip;
23714 // check if masked bits of addr are zero
23715 __ testq($addr$$Register, $mask$$constant);
23716 __ jccb(Assembler::equal, Lskip);
23717 __ stop("verify_vector_alignment found a misaligned vector memory access");
23718 __ bind(Lskip);
23719 %}
23720 ins_pipe(pipe_slow);
23721 %}
23722
23723 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23724 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23725 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23726 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23727 ins_encode %{
23728 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23729 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23730
23731 Label DONE;
23732 int vlen_enc = vector_length_encoding(this, $src1);
23733 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23734
23735 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23736 __ mov64($dst$$Register, -1L);
23737 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23738 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23739 __ jccb(Assembler::carrySet, DONE);
23740 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23741 __ notq($dst$$Register);
23742 __ tzcntq($dst$$Register, $dst$$Register);
23743 __ bind(DONE);
23744 %}
23745 ins_pipe( pipe_slow );
23746 %}
23747
23748
23749 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23750 match(Set dst (VectorMaskGen len));
23751 effect(TEMP temp, KILL cr);
23752 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23753 ins_encode %{
23754 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23755 %}
23756 ins_pipe( pipe_slow );
23757 %}
23758
23759 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23760 match(Set dst (VectorMaskGen len));
23761 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23762 effect(TEMP temp);
23763 ins_encode %{
23764 if ($len$$constant > 0) {
23765 __ mov64($temp$$Register, right_n_bits($len$$constant));
23766 __ kmovql($dst$$KRegister, $temp$$Register);
23767 } else {
23768 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23769 }
23770 %}
23771 ins_pipe( pipe_slow );
23772 %}
23773
23774 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23775 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23776 match(Set dst (VectorMaskToLong mask));
23777 effect(TEMP dst, KILL cr);
23778 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23779 ins_encode %{
23780 int opcode = this->ideal_Opcode();
23781 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23782 int mask_len = Matcher::vector_length(this, $mask);
23783 int mask_size = mask_len * type2aelembytes(mbt);
23784 int vlen_enc = vector_length_encoding(this, $mask);
23785 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23786 $dst$$Register, mask_len, mask_size, vlen_enc);
23787 %}
23788 ins_pipe( pipe_slow );
23789 %}
23790
23791 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23792 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23793 match(Set dst (VectorMaskToLong mask));
23794 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23795 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23796 ins_encode %{
23797 int opcode = this->ideal_Opcode();
23798 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23799 int mask_len = Matcher::vector_length(this, $mask);
23800 int vlen_enc = vector_length_encoding(this, $mask);
23801 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23802 $dst$$Register, mask_len, mbt, vlen_enc);
23803 %}
23804 ins_pipe( pipe_slow );
23805 %}
23806
23807 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23808 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23809 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23810 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23811 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23812 ins_encode %{
23813 int opcode = this->ideal_Opcode();
23814 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23815 int mask_len = Matcher::vector_length(this, $mask);
23816 int vlen_enc = vector_length_encoding(this, $mask);
23817 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23818 $dst$$Register, mask_len, mbt, vlen_enc);
23819 %}
23820 ins_pipe( pipe_slow );
23821 %}
23822
23823 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23824 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23825 match(Set dst (VectorMaskTrueCount mask));
23826 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23827 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23828 ins_encode %{
23829 int opcode = this->ideal_Opcode();
23830 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23831 int mask_len = Matcher::vector_length(this, $mask);
23832 int mask_size = mask_len * type2aelembytes(mbt);
23833 int vlen_enc = vector_length_encoding(this, $mask);
23834 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23835 $tmp$$Register, mask_len, mask_size, vlen_enc);
23836 %}
23837 ins_pipe( pipe_slow );
23838 %}
23839
23840 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23841 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23842 match(Set dst (VectorMaskTrueCount mask));
23843 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23844 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23845 ins_encode %{
23846 int opcode = this->ideal_Opcode();
23847 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23848 int mask_len = Matcher::vector_length(this, $mask);
23849 int vlen_enc = vector_length_encoding(this, $mask);
23850 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23851 $tmp$$Register, mask_len, mbt, vlen_enc);
23852 %}
23853 ins_pipe( pipe_slow );
23854 %}
23855
23856 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23857 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23858 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23859 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23860 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23861 ins_encode %{
23862 int opcode = this->ideal_Opcode();
23863 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23864 int mask_len = Matcher::vector_length(this, $mask);
23865 int vlen_enc = vector_length_encoding(this, $mask);
23866 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23867 $tmp$$Register, mask_len, mbt, vlen_enc);
23868 %}
23869 ins_pipe( pipe_slow );
23870 %}
23871
23872 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23873 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23874 match(Set dst (VectorMaskFirstTrue mask));
23875 match(Set dst (VectorMaskLastTrue mask));
23876 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23877 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23878 ins_encode %{
23879 int opcode = this->ideal_Opcode();
23880 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23881 int mask_len = Matcher::vector_length(this, $mask);
23882 int mask_size = mask_len * type2aelembytes(mbt);
23883 int vlen_enc = vector_length_encoding(this, $mask);
23884 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23885 $tmp$$Register, mask_len, mask_size, vlen_enc);
23886 %}
23887 ins_pipe( pipe_slow );
23888 %}
23889
23890 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23891 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23892 match(Set dst (VectorMaskFirstTrue mask));
23893 match(Set dst (VectorMaskLastTrue mask));
23894 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23895 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23896 ins_encode %{
23897 int opcode = this->ideal_Opcode();
23898 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23899 int mask_len = Matcher::vector_length(this, $mask);
23900 int vlen_enc = vector_length_encoding(this, $mask);
23901 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23902 $tmp$$Register, mask_len, mbt, vlen_enc);
23903 %}
23904 ins_pipe( pipe_slow );
23905 %}
23906
23907 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23908 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23909 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23910 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23911 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23912 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23913 ins_encode %{
23914 int opcode = this->ideal_Opcode();
23915 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23916 int mask_len = Matcher::vector_length(this, $mask);
23917 int vlen_enc = vector_length_encoding(this, $mask);
23918 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23919 $tmp$$Register, mask_len, mbt, vlen_enc);
23920 %}
23921 ins_pipe( pipe_slow );
23922 %}
23923
23924 // --------------------------------- Compress/Expand Operations ---------------------------
23925 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23926 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23927 match(Set dst (CompressV src mask));
23928 match(Set dst (ExpandV src mask));
23929 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23930 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23931 ins_encode %{
23932 int opcode = this->ideal_Opcode();
23933 int vlen_enc = vector_length_encoding(this);
23934 BasicType bt = Matcher::vector_element_basic_type(this);
23935 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23936 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23937 %}
23938 ins_pipe( pipe_slow );
23939 %}
23940
23941 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23942 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23943 match(Set dst (CompressV src mask));
23944 match(Set dst (ExpandV src mask));
23945 format %{ "vector_compress_expand $dst, $src, $mask" %}
23946 ins_encode %{
23947 int opcode = this->ideal_Opcode();
23948 int vector_len = vector_length_encoding(this);
23949 BasicType bt = Matcher::vector_element_basic_type(this);
23950 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23951 %}
23952 ins_pipe( pipe_slow );
23953 %}
23954
23955 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23956 match(Set dst (CompressM mask));
23957 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23958 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23959 ins_encode %{
23960 assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
23961 int mask_len = Matcher::vector_length(this);
23962 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23963 %}
23964 ins_pipe( pipe_slow );
23965 %}
23966
23967 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23968
23969 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23970 predicate(!VM_Version::supports_gfni());
23971 match(Set dst (ReverseV src));
23972 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23973 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23974 ins_encode %{
23975 int vec_enc = vector_length_encoding(this);
23976 BasicType bt = Matcher::vector_element_basic_type(this);
23977 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23978 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23979 %}
23980 ins_pipe( pipe_slow );
23981 %}
23982
23983 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23984 predicate(VM_Version::supports_gfni());
23985 match(Set dst (ReverseV src));
23986 effect(TEMP dst, TEMP xtmp);
23987 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23988 ins_encode %{
23989 int vec_enc = vector_length_encoding(this);
23990 BasicType bt = Matcher::vector_element_basic_type(this);
23991 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23992 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23993 $xtmp$$XMMRegister);
23994 %}
23995 ins_pipe( pipe_slow );
23996 %}
23997
23998 instruct vreverse_byte_reg(vec dst, vec src) %{
23999 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24000 match(Set dst (ReverseBytesV src));
24001 effect(TEMP dst);
24002 format %{ "vector_reverse_byte $dst, $src" %}
24003 ins_encode %{
24004 int vec_enc = vector_length_encoding(this);
24005 BasicType bt = Matcher::vector_element_basic_type(this);
24006 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24007 %}
24008 ins_pipe( pipe_slow );
24009 %}
24010
24011 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24012 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24013 match(Set dst (ReverseBytesV src));
24014 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24015 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24016 ins_encode %{
24017 int vec_enc = vector_length_encoding(this);
24018 BasicType bt = Matcher::vector_element_basic_type(this);
24019 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24020 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24021 %}
24022 ins_pipe( pipe_slow );
24023 %}
24024
24025 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24026
24027 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24028 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24029 Matcher::vector_length_in_bytes(n->in(1))));
24030 match(Set dst (CountLeadingZerosV src));
24031 format %{ "vector_count_leading_zeros $dst, $src" %}
24032 ins_encode %{
24033 int vlen_enc = vector_length_encoding(this, $src);
24034 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24036 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24037 %}
24038 ins_pipe( pipe_slow );
24039 %}
24040
24041 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24042 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24043 Matcher::vector_length_in_bytes(n->in(1))));
24044 match(Set dst (CountLeadingZerosV src mask));
24045 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24046 ins_encode %{
24047 int vlen_enc = vector_length_encoding(this, $src);
24048 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24049 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24050 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24051 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24052 %}
24053 ins_pipe( pipe_slow );
24054 %}
24055
24056 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24057 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24058 VM_Version::supports_avx512cd() &&
24059 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24060 match(Set dst (CountLeadingZerosV src));
24061 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24062 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24063 ins_encode %{
24064 int vlen_enc = vector_length_encoding(this, $src);
24065 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24066 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24067 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24068 %}
24069 ins_pipe( pipe_slow );
24070 %}
24071
24072 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24073 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24074 match(Set dst (CountLeadingZerosV src));
24075 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24076 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24077 ins_encode %{
24078 int vlen_enc = vector_length_encoding(this, $src);
24079 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24080 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24081 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24082 $rtmp$$Register, true, vlen_enc);
24083 %}
24084 ins_pipe( pipe_slow );
24085 %}
24086
24087 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24088 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24089 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24090 match(Set dst (CountLeadingZerosV src));
24091 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24092 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24093 ins_encode %{
24094 int vlen_enc = vector_length_encoding(this, $src);
24095 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24096 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24097 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24098 %}
24099 ins_pipe( pipe_slow );
24100 %}
24101
24102 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24103 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24104 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24105 match(Set dst (CountLeadingZerosV src));
24106 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24107 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24108 ins_encode %{
24109 int vlen_enc = vector_length_encoding(this, $src);
24110 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24111 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24112 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24113 %}
24114 ins_pipe( pipe_slow );
24115 %}
24116
24117 // ---------------------------------- Vector Masked Operations ------------------------------------
24118
24119 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24120 match(Set dst (AddVB (Binary dst src2) mask));
24121 match(Set dst (AddVS (Binary dst src2) mask));
24122 match(Set dst (AddVI (Binary dst src2) mask));
24123 match(Set dst (AddVL (Binary dst src2) mask));
24124 match(Set dst (AddVF (Binary dst src2) mask));
24125 match(Set dst (AddVD (Binary dst src2) mask));
24126 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24127 ins_encode %{
24128 int vlen_enc = vector_length_encoding(this);
24129 BasicType bt = Matcher::vector_element_basic_type(this);
24130 int opc = this->ideal_Opcode();
24131 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24132 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24133 %}
24134 ins_pipe( pipe_slow );
24135 %}
24136
24137 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24138 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24139 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24140 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24141 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24142 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24143 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24144 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24145 ins_encode %{
24146 int vlen_enc = vector_length_encoding(this);
24147 BasicType bt = Matcher::vector_element_basic_type(this);
24148 int opc = this->ideal_Opcode();
24149 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24150 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24151 %}
24152 ins_pipe( pipe_slow );
24153 %}
24154
24155 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24156 match(Set dst (XorV (Binary dst src2) mask));
24157 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24158 ins_encode %{
24159 int vlen_enc = vector_length_encoding(this);
24160 BasicType bt = Matcher::vector_element_basic_type(this);
24161 int opc = this->ideal_Opcode();
24162 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24163 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24164 %}
24165 ins_pipe( pipe_slow );
24166 %}
24167
24168 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24169 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24170 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24171 ins_encode %{
24172 int vlen_enc = vector_length_encoding(this);
24173 BasicType bt = Matcher::vector_element_basic_type(this);
24174 int opc = this->ideal_Opcode();
24175 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24176 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24177 %}
24178 ins_pipe( pipe_slow );
24179 %}
24180
24181 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24182 match(Set dst (OrV (Binary dst src2) mask));
24183 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24184 ins_encode %{
24185 int vlen_enc = vector_length_encoding(this);
24186 BasicType bt = Matcher::vector_element_basic_type(this);
24187 int opc = this->ideal_Opcode();
24188 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24189 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24190 %}
24191 ins_pipe( pipe_slow );
24192 %}
24193
24194 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24195 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24196 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24197 ins_encode %{
24198 int vlen_enc = vector_length_encoding(this);
24199 BasicType bt = Matcher::vector_element_basic_type(this);
24200 int opc = this->ideal_Opcode();
24201 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24202 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24203 %}
24204 ins_pipe( pipe_slow );
24205 %}
24206
24207 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24208 match(Set dst (AndV (Binary dst src2) mask));
24209 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24210 ins_encode %{
24211 int vlen_enc = vector_length_encoding(this);
24212 BasicType bt = Matcher::vector_element_basic_type(this);
24213 int opc = this->ideal_Opcode();
24214 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24215 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24216 %}
24217 ins_pipe( pipe_slow );
24218 %}
24219
24220 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24221 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24222 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24223 ins_encode %{
24224 int vlen_enc = vector_length_encoding(this);
24225 BasicType bt = Matcher::vector_element_basic_type(this);
24226 int opc = this->ideal_Opcode();
24227 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24228 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24229 %}
24230 ins_pipe( pipe_slow );
24231 %}
24232
24233 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24234 match(Set dst (SubVB (Binary dst src2) mask));
24235 match(Set dst (SubVS (Binary dst src2) mask));
24236 match(Set dst (SubVI (Binary dst src2) mask));
24237 match(Set dst (SubVL (Binary dst src2) mask));
24238 match(Set dst (SubVF (Binary dst src2) mask));
24239 match(Set dst (SubVD (Binary dst src2) mask));
24240 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24241 ins_encode %{
24242 int vlen_enc = vector_length_encoding(this);
24243 BasicType bt = Matcher::vector_element_basic_type(this);
24244 int opc = this->ideal_Opcode();
24245 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24246 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24247 %}
24248 ins_pipe( pipe_slow );
24249 %}
24250
24251 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24252 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24253 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24254 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24255 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24256 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24257 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24258 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24259 ins_encode %{
24260 int vlen_enc = vector_length_encoding(this);
24261 BasicType bt = Matcher::vector_element_basic_type(this);
24262 int opc = this->ideal_Opcode();
24263 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24264 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24265 %}
24266 ins_pipe( pipe_slow );
24267 %}
24268
24269 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24270 match(Set dst (MulVS (Binary dst src2) mask));
24271 match(Set dst (MulVI (Binary dst src2) mask));
24272 match(Set dst (MulVL (Binary dst src2) mask));
24273 match(Set dst (MulVF (Binary dst src2) mask));
24274 match(Set dst (MulVD (Binary dst src2) mask));
24275 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24276 ins_encode %{
24277 int vlen_enc = vector_length_encoding(this);
24278 BasicType bt = Matcher::vector_element_basic_type(this);
24279 int opc = this->ideal_Opcode();
24280 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24281 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24282 %}
24283 ins_pipe( pipe_slow );
24284 %}
24285
24286 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24287 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24288 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24289 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24290 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24291 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24292 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24293 ins_encode %{
24294 int vlen_enc = vector_length_encoding(this);
24295 BasicType bt = Matcher::vector_element_basic_type(this);
24296 int opc = this->ideal_Opcode();
24297 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24298 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24299 %}
24300 ins_pipe( pipe_slow );
24301 %}
24302
24303 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24304 match(Set dst (SqrtVF dst mask));
24305 match(Set dst (SqrtVD dst mask));
24306 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24307 ins_encode %{
24308 int vlen_enc = vector_length_encoding(this);
24309 BasicType bt = Matcher::vector_element_basic_type(this);
24310 int opc = this->ideal_Opcode();
24311 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24312 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24313 %}
24314 ins_pipe( pipe_slow );
24315 %}
24316
24317 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24318 match(Set dst (DivVF (Binary dst src2) mask));
24319 match(Set dst (DivVD (Binary dst src2) mask));
24320 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24321 ins_encode %{
24322 int vlen_enc = vector_length_encoding(this);
24323 BasicType bt = Matcher::vector_element_basic_type(this);
24324 int opc = this->ideal_Opcode();
24325 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24326 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24327 %}
24328 ins_pipe( pipe_slow );
24329 %}
24330
24331 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24332 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24333 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24334 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24335 ins_encode %{
24336 int vlen_enc = vector_length_encoding(this);
24337 BasicType bt = Matcher::vector_element_basic_type(this);
24338 int opc = this->ideal_Opcode();
24339 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24341 %}
24342 ins_pipe( pipe_slow );
24343 %}
24344
24345
24346 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24347 match(Set dst (RotateLeftV (Binary dst shift) mask));
24348 match(Set dst (RotateRightV (Binary dst shift) mask));
24349 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24350 ins_encode %{
24351 int vlen_enc = vector_length_encoding(this);
24352 BasicType bt = Matcher::vector_element_basic_type(this);
24353 int opc = this->ideal_Opcode();
24354 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24355 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24356 %}
24357 ins_pipe( pipe_slow );
24358 %}
24359
24360 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24361 match(Set dst (RotateLeftV (Binary dst src2) mask));
24362 match(Set dst (RotateRightV (Binary dst src2) mask));
24363 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24364 ins_encode %{
24365 int vlen_enc = vector_length_encoding(this);
24366 BasicType bt = Matcher::vector_element_basic_type(this);
24367 int opc = this->ideal_Opcode();
24368 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24369 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24370 %}
24371 ins_pipe( pipe_slow );
24372 %}
24373
24374 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24375 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24376 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24377 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24378 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24379 ins_encode %{
24380 int vlen_enc = vector_length_encoding(this);
24381 BasicType bt = Matcher::vector_element_basic_type(this);
24382 int opc = this->ideal_Opcode();
24383 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24384 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24385 %}
24386 ins_pipe( pipe_slow );
24387 %}
24388
24389 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24390 predicate(!n->as_ShiftV()->is_var_shift());
24391 match(Set dst (LShiftVS (Binary dst src2) mask));
24392 match(Set dst (LShiftVI (Binary dst src2) mask));
24393 match(Set dst (LShiftVL (Binary dst src2) mask));
24394 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24395 ins_encode %{
24396 int vlen_enc = vector_length_encoding(this);
24397 BasicType bt = Matcher::vector_element_basic_type(this);
24398 int opc = this->ideal_Opcode();
24399 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24400 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24401 %}
24402 ins_pipe( pipe_slow );
24403 %}
24404
24405 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24406 predicate(n->as_ShiftV()->is_var_shift());
24407 match(Set dst (LShiftVS (Binary dst src2) mask));
24408 match(Set dst (LShiftVI (Binary dst src2) mask));
24409 match(Set dst (LShiftVL (Binary dst src2) mask));
24410 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24411 ins_encode %{
24412 int vlen_enc = vector_length_encoding(this);
24413 BasicType bt = Matcher::vector_element_basic_type(this);
24414 int opc = this->ideal_Opcode();
24415 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24416 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24417 %}
24418 ins_pipe( pipe_slow );
24419 %}
24420
24421 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24422 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24423 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24424 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24425 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24426 ins_encode %{
24427 int vlen_enc = vector_length_encoding(this);
24428 BasicType bt = Matcher::vector_element_basic_type(this);
24429 int opc = this->ideal_Opcode();
24430 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24431 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24432 %}
24433 ins_pipe( pipe_slow );
24434 %}
24435
24436 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24437 predicate(!n->as_ShiftV()->is_var_shift());
24438 match(Set dst (RShiftVS (Binary dst src2) mask));
24439 match(Set dst (RShiftVI (Binary dst src2) mask));
24440 match(Set dst (RShiftVL (Binary dst src2) mask));
24441 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24442 ins_encode %{
24443 int vlen_enc = vector_length_encoding(this);
24444 BasicType bt = Matcher::vector_element_basic_type(this);
24445 int opc = this->ideal_Opcode();
24446 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24447 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24448 %}
24449 ins_pipe( pipe_slow );
24450 %}
24451
24452 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24453 predicate(n->as_ShiftV()->is_var_shift());
24454 match(Set dst (RShiftVS (Binary dst src2) mask));
24455 match(Set dst (RShiftVI (Binary dst src2) mask));
24456 match(Set dst (RShiftVL (Binary dst src2) mask));
24457 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24458 ins_encode %{
24459 int vlen_enc = vector_length_encoding(this);
24460 BasicType bt = Matcher::vector_element_basic_type(this);
24461 int opc = this->ideal_Opcode();
24462 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24463 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24464 %}
24465 ins_pipe( pipe_slow );
24466 %}
24467
24468 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24469 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24470 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24471 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24472 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24473 ins_encode %{
24474 int vlen_enc = vector_length_encoding(this);
24475 BasicType bt = Matcher::vector_element_basic_type(this);
24476 int opc = this->ideal_Opcode();
24477 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24479 %}
24480 ins_pipe( pipe_slow );
24481 %}
24482
24483 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24484 predicate(!n->as_ShiftV()->is_var_shift());
24485 match(Set dst (URShiftVS (Binary dst src2) mask));
24486 match(Set dst (URShiftVI (Binary dst src2) mask));
24487 match(Set dst (URShiftVL (Binary dst src2) mask));
24488 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24489 ins_encode %{
24490 int vlen_enc = vector_length_encoding(this);
24491 BasicType bt = Matcher::vector_element_basic_type(this);
24492 int opc = this->ideal_Opcode();
24493 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24495 %}
24496 ins_pipe( pipe_slow );
24497 %}
24498
24499 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24500 predicate(n->as_ShiftV()->is_var_shift());
24501 match(Set dst (URShiftVS (Binary dst src2) mask));
24502 match(Set dst (URShiftVI (Binary dst src2) mask));
24503 match(Set dst (URShiftVL (Binary dst src2) mask));
24504 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24505 ins_encode %{
24506 int vlen_enc = vector_length_encoding(this);
24507 BasicType bt = Matcher::vector_element_basic_type(this);
24508 int opc = this->ideal_Opcode();
24509 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24510 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24511 %}
24512 ins_pipe( pipe_slow );
24513 %}
24514
24515 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24516 match(Set dst (MaxV (Binary dst src2) mask));
24517 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24518 ins_encode %{
24519 int vlen_enc = vector_length_encoding(this);
24520 BasicType bt = Matcher::vector_element_basic_type(this);
24521 int opc = this->ideal_Opcode();
24522 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24523 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24524 %}
24525 ins_pipe( pipe_slow );
24526 %}
24527
24528 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24529 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24530 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24531 ins_encode %{
24532 int vlen_enc = vector_length_encoding(this);
24533 BasicType bt = Matcher::vector_element_basic_type(this);
24534 int opc = this->ideal_Opcode();
24535 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24536 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24537 %}
24538 ins_pipe( pipe_slow );
24539 %}
24540
24541 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24542 match(Set dst (MinV (Binary dst src2) mask));
24543 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24544 ins_encode %{
24545 int vlen_enc = vector_length_encoding(this);
24546 BasicType bt = Matcher::vector_element_basic_type(this);
24547 int opc = this->ideal_Opcode();
24548 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24549 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24550 %}
24551 ins_pipe( pipe_slow );
24552 %}
24553
24554 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24555 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24556 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24557 ins_encode %{
24558 int vlen_enc = vector_length_encoding(this);
24559 BasicType bt = Matcher::vector_element_basic_type(this);
24560 int opc = this->ideal_Opcode();
24561 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24562 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24563 %}
24564 ins_pipe( pipe_slow );
24565 %}
24566
24567 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24568 match(Set dst (VectorRearrange (Binary dst src2) mask));
24569 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24570 ins_encode %{
24571 int vlen_enc = vector_length_encoding(this);
24572 BasicType bt = Matcher::vector_element_basic_type(this);
24573 int opc = this->ideal_Opcode();
24574 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24575 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24576 %}
24577 ins_pipe( pipe_slow );
24578 %}
24579
24580 instruct vabs_masked(vec dst, kReg mask) %{
24581 match(Set dst (AbsVB dst mask));
24582 match(Set dst (AbsVS dst mask));
24583 match(Set dst (AbsVI dst mask));
24584 match(Set dst (AbsVL dst mask));
24585 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24586 ins_encode %{
24587 int vlen_enc = vector_length_encoding(this);
24588 BasicType bt = Matcher::vector_element_basic_type(this);
24589 int opc = this->ideal_Opcode();
24590 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24591 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24592 %}
24593 ins_pipe( pipe_slow );
24594 %}
24595
24596 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24597 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24598 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24599 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24600 ins_encode %{
24601 assert(UseFMA, "Needs FMA instructions support.");
24602 int vlen_enc = vector_length_encoding(this);
24603 BasicType bt = Matcher::vector_element_basic_type(this);
24604 int opc = this->ideal_Opcode();
24605 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24606 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24607 %}
24608 ins_pipe( pipe_slow );
24609 %}
24610
24611 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24612 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24613 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24614 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24615 ins_encode %{
24616 assert(UseFMA, "Needs FMA instructions support.");
24617 int vlen_enc = vector_length_encoding(this);
24618 BasicType bt = Matcher::vector_element_basic_type(this);
24619 int opc = this->ideal_Opcode();
24620 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24621 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24622 %}
24623 ins_pipe( pipe_slow );
24624 %}
24625
24626 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24627 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24628 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24629 ins_encode %{
24630 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24631 int vlen_enc = vector_length_encoding(this, $src1);
24632 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24633
24634 // Comparison i
24635 switch (src1_elem_bt) {
24636 case T_BYTE: {
24637 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24638 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24639 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24640 break;
24641 }
24642 case T_SHORT: {
24643 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24644 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24645 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24646 break;
24647 }
24648 case T_INT: {
24649 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24650 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24651 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24652 break;
24653 }
24654 case T_LONG: {
24655 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24656 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24657 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24658 break;
24659 }
24660 case T_FLOAT: {
24661 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24662 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24663 break;
24664 }
24665 case T_DOUBLE: {
24666 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24667 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24668 break;
24669 }
24670 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24671 }
24672 %}
24673 ins_pipe( pipe_slow );
24674 %}
24675
24676 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24677 predicate(Matcher::vector_length(n) <= 32);
24678 match(Set dst (MaskAll src));
24679 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24680 ins_encode %{
24681 int mask_len = Matcher::vector_length(this);
24682 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24683 %}
24684 ins_pipe( pipe_slow );
24685 %}
24686
24687 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24688 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24689 match(Set dst (XorVMask src (MaskAll cnt)));
24690 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24691 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24692 ins_encode %{
24693 uint masklen = Matcher::vector_length(this);
24694 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24695 %}
24696 ins_pipe( pipe_slow );
24697 %}
24698
24699 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24700 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24701 (Matcher::vector_length(n) == 16) ||
24702 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24703 match(Set dst (XorVMask src (MaskAll cnt)));
24704 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24705 ins_encode %{
24706 uint masklen = Matcher::vector_length(this);
24707 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24708 %}
24709 ins_pipe( pipe_slow );
24710 %}
24711
24712 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24713 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24714 match(Set dst (VectorLongToMask src));
24715 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24716 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24717 ins_encode %{
24718 int mask_len = Matcher::vector_length(this);
24719 int vec_enc = vector_length_encoding(mask_len);
24720 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24721 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24722 %}
24723 ins_pipe( pipe_slow );
24724 %}
24725
24726
24727 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24728 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24729 match(Set dst (VectorLongToMask src));
24730 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24731 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24732 ins_encode %{
24733 int mask_len = Matcher::vector_length(this);
24734 assert(mask_len <= 32, "invalid mask length");
24735 int vec_enc = vector_length_encoding(mask_len);
24736 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24737 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24738 %}
24739 ins_pipe( pipe_slow );
24740 %}
24741
24742 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24743 predicate(n->bottom_type()->isa_pvectmask());
24744 match(Set dst (VectorLongToMask src));
24745 format %{ "long_to_mask_evex $dst, $src\t!" %}
24746 ins_encode %{
24747 __ kmov($dst$$KRegister, $src$$Register);
24748 %}
24749 ins_pipe( pipe_slow );
24750 %}
24751
24752 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24753 match(Set dst (AndVMask src1 src2));
24754 match(Set dst (OrVMask src1 src2));
24755 match(Set dst (XorVMask src1 src2));
24756 effect(TEMP kscratch);
24757 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24758 ins_encode %{
24759 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24760 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24761 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24762 uint masklen = Matcher::vector_length(this);
24763 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24764 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24765 %}
24766 ins_pipe( pipe_slow );
24767 %}
24768
24769 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24770 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24771 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24772 ins_encode %{
24773 int vlen_enc = vector_length_encoding(this);
24774 BasicType bt = Matcher::vector_element_basic_type(this);
24775 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24776 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24777 %}
24778 ins_pipe( pipe_slow );
24779 %}
24780
24781 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24782 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24783 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24784 ins_encode %{
24785 int vlen_enc = vector_length_encoding(this);
24786 BasicType bt = Matcher::vector_element_basic_type(this);
24787 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24788 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24789 %}
24790 ins_pipe( pipe_slow );
24791 %}
24792
24793 instruct castMM(kReg dst)
24794 %{
24795 match(Set dst (CastVV dst));
24796
24797 size(0);
24798 format %{ "# castVV of $dst" %}
24799 ins_encode(/* empty encoding */);
24800 ins_cost(0);
24801 ins_pipe(empty);
24802 %}
24803
24804 instruct castVV(vec dst)
24805 %{
24806 match(Set dst (CastVV dst));
24807
24808 size(0);
24809 format %{ "# castVV of $dst" %}
24810 ins_encode(/* empty encoding */);
24811 ins_cost(0);
24812 ins_pipe(empty);
24813 %}
24814
24815 instruct castVVLeg(legVec dst)
24816 %{
24817 match(Set dst (CastVV dst));
24818
24819 size(0);
24820 format %{ "# castVV of $dst" %}
24821 ins_encode(/* empty encoding */);
24822 ins_cost(0);
24823 ins_pipe(empty);
24824 %}
24825
24826 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24827 %{
24828 match(Set dst (IsInfiniteF src));
24829 effect(TEMP ktmp, KILL cr);
24830 format %{ "float_class_check $dst, $src" %}
24831 ins_encode %{
24832 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24833 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24834 %}
24835 ins_pipe(pipe_slow);
24836 %}
24837
24838 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24839 %{
24840 match(Set dst (IsInfiniteD src));
24841 effect(TEMP ktmp, KILL cr);
24842 format %{ "double_class_check $dst, $src" %}
24843 ins_encode %{
24844 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24845 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24846 %}
24847 ins_pipe(pipe_slow);
24848 %}
24849
24850 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24851 %{
24852 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24853 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24854 match(Set dst (SaturatingAddV src1 src2));
24855 match(Set dst (SaturatingSubV src1 src2));
24856 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24857 ins_encode %{
24858 int vlen_enc = vector_length_encoding(this);
24859 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24860 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24861 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24862 %}
24863 ins_pipe(pipe_slow);
24864 %}
24865
24866 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24867 %{
24868 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24869 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24870 match(Set dst (SaturatingAddV src1 src2));
24871 match(Set dst (SaturatingSubV src1 src2));
24872 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24873 ins_encode %{
24874 int vlen_enc = vector_length_encoding(this);
24875 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24876 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24877 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24878 %}
24879 ins_pipe(pipe_slow);
24880 %}
24881
24882 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24883 %{
24884 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24885 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24886 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24887 match(Set dst (SaturatingAddV src1 src2));
24888 match(Set dst (SaturatingSubV src1 src2));
24889 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24890 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24891 ins_encode %{
24892 int vlen_enc = vector_length_encoding(this);
24893 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24894 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24895 $src1$$XMMRegister, $src2$$XMMRegister,
24896 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24897 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24898 %}
24899 ins_pipe(pipe_slow);
24900 %}
24901
24902 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24903 %{
24904 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24905 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24906 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24907 match(Set dst (SaturatingAddV src1 src2));
24908 match(Set dst (SaturatingSubV src1 src2));
24909 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24910 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24911 ins_encode %{
24912 int vlen_enc = vector_length_encoding(this);
24913 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24914 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24915 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24916 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24917 %}
24918 ins_pipe(pipe_slow);
24919 %}
24920
24921 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24922 %{
24923 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24924 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24925 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24926 match(Set dst (SaturatingAddV src1 src2));
24927 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24928 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24929 ins_encode %{
24930 int vlen_enc = vector_length_encoding(this);
24931 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24932 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24933 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24934 %}
24935 ins_pipe(pipe_slow);
24936 %}
24937
24938 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24939 %{
24940 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24941 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24942 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24943 match(Set dst (SaturatingAddV src1 src2));
24944 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24945 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24946 ins_encode %{
24947 int vlen_enc = vector_length_encoding(this);
24948 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24949 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24950 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24951 %}
24952 ins_pipe(pipe_slow);
24953 %}
24954
24955 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24956 %{
24957 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24958 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24959 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24960 match(Set dst (SaturatingSubV src1 src2));
24961 effect(TEMP ktmp);
24962 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24963 ins_encode %{
24964 int vlen_enc = vector_length_encoding(this);
24965 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24966 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24967 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24968 %}
24969 ins_pipe(pipe_slow);
24970 %}
24971
24972 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24973 %{
24974 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24975 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24976 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24977 match(Set dst (SaturatingSubV src1 src2));
24978 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24979 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24980 ins_encode %{
24981 int vlen_enc = vector_length_encoding(this);
24982 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24983 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24984 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24985 %}
24986 ins_pipe(pipe_slow);
24987 %}
24988
24989 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24990 %{
24991 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24992 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24993 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24994 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24995 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24996 ins_encode %{
24997 int vlen_enc = vector_length_encoding(this);
24998 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24999 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25000 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25001 %}
25002 ins_pipe(pipe_slow);
25003 %}
25004
25005 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25006 %{
25007 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25008 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25009 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25010 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25011 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25012 ins_encode %{
25013 int vlen_enc = vector_length_encoding(this);
25014 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25015 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25016 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25017 %}
25018 ins_pipe(pipe_slow);
25019 %}
25020
25021 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25022 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25023 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25024 match(Set dst (SaturatingAddV (Binary dst src) mask));
25025 match(Set dst (SaturatingSubV (Binary dst src) mask));
25026 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25027 ins_encode %{
25028 int vlen_enc = vector_length_encoding(this);
25029 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25030 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25031 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25032 %}
25033 ins_pipe( pipe_slow );
25034 %}
25035
25036 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25037 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25038 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25039 match(Set dst (SaturatingAddV (Binary dst src) mask));
25040 match(Set dst (SaturatingSubV (Binary dst src) mask));
25041 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25042 ins_encode %{
25043 int vlen_enc = vector_length_encoding(this);
25044 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25045 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25046 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25047 %}
25048 ins_pipe( pipe_slow );
25049 %}
25050
25051 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25052 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25053 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25054 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25055 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25056 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25057 ins_encode %{
25058 int vlen_enc = vector_length_encoding(this);
25059 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25060 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25061 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25062 %}
25063 ins_pipe( pipe_slow );
25064 %}
25065
25066 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25067 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25068 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25069 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25070 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25071 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25072 ins_encode %{
25073 int vlen_enc = vector_length_encoding(this);
25074 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25075 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25076 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25077 %}
25078 ins_pipe( pipe_slow );
25079 %}
25080
25081 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25082 %{
25083 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25084 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25085 ins_encode %{
25086 int vlen_enc = vector_length_encoding(this);
25087 BasicType bt = Matcher::vector_element_basic_type(this);
25088 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25089 %}
25090 ins_pipe(pipe_slow);
25091 %}
25092
25093 instruct reinterpretS2HF(regF dst, rRegI src)
25094 %{
25095 match(Set dst (ReinterpretS2HF src));
25096 format %{ "evmovw $dst, $src" %}
25097 ins_encode %{
25098 __ evmovw($dst$$XMMRegister, $src$$Register);
25099 %}
25100 ins_pipe(pipe_slow);
25101 %}
25102
25103 instruct reinterpretHF2S(rRegI dst, regF src)
25104 %{
25105 match(Set dst (ReinterpretHF2S src));
25106 format %{ "evmovw $dst, $src" %}
25107 ins_encode %{
25108 __ evmovw($dst$$Register, $src$$XMMRegister);
25109 __ narrow_subword_type($dst$$Register, T_SHORT);
25110 %}
25111 ins_pipe(pipe_slow);
25112 %}
25113
25114 instruct convF2HFAndS2HF(regF dst, regF src)
25115 %{
25116 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25117 format %{ "convF2HFAndS2HF $dst, $src" %}
25118 ins_encode %{
25119 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25120 %}
25121 ins_pipe(pipe_slow);
25122 %}
25123
25124 instruct convHF2SAndHF2F(regF dst, regF src)
25125 %{
25126 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25127 format %{ "convHF2SAndHF2F $dst, $src" %}
25128 ins_encode %{
25129 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25130 %}
25131 ins_pipe(pipe_slow);
25132 %}
25133
25134 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25135 %{
25136 match(Set dst (SqrtHF src));
25137 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25138 ins_encode %{
25139 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25140 %}
25141 ins_pipe(pipe_slow);
25142 %}
25143
25144 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25145 %{
25146 match(Set dst (AddHF src1 src2));
25147 match(Set dst (DivHF src1 src2));
25148 match(Set dst (MulHF src1 src2));
25149 match(Set dst (SubHF src1 src2));
25150 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25151 ins_encode %{
25152 int opcode = this->ideal_Opcode();
25153 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25154 %}
25155 ins_pipe(pipe_slow);
25156 %}
25157
25158 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25159 %{
25160 predicate(VM_Version::supports_avx10_2());
25161 match(Set dst (MaxHF src1 src2));
25162 match(Set dst (MinHF src1 src2));
25163
25164 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25165 ins_encode %{
25166 int opcode = this->ideal_Opcode();
25167 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25168 %}
25169 ins_pipe( pipe_slow );
25170 %}
25171
25172 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25173 %{
25174 predicate(!VM_Version::supports_avx10_2());
25175 match(Set dst (MaxHF src1 src2));
25176 match(Set dst (MinHF src1 src2));
25177 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25178
25179 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25180 ins_encode %{
25181 int opcode = this->ideal_Opcode();
25182 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25183 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25184 %}
25185 ins_pipe( pipe_slow );
25186 %}
25187
25188 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25189 %{
25190 match(Set dst (FmaHF src2 (Binary dst src1)));
25191 effect(DEF dst);
25192 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25193 ins_encode %{
25194 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25195 %}
25196 ins_pipe( pipe_slow );
25197 %}
25198
25199
25200 instruct vector_sqrt_HF_reg(vec dst, vec src)
25201 %{
25202 match(Set dst (SqrtVHF src));
25203 format %{ "vector_sqrt_fp16 $dst, $src" %}
25204 ins_encode %{
25205 int vlen_enc = vector_length_encoding(this);
25206 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25207 %}
25208 ins_pipe(pipe_slow);
25209 %}
25210
25211 instruct vector_sqrt_HF_mem(vec dst, memory src)
25212 %{
25213 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25214 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25215 ins_encode %{
25216 int vlen_enc = vector_length_encoding(this);
25217 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25218 %}
25219 ins_pipe(pipe_slow);
25220 %}
25221
25222 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25223 %{
25224 match(Set dst (AddVHF src1 src2));
25225 match(Set dst (DivVHF src1 src2));
25226 match(Set dst (MulVHF src1 src2));
25227 match(Set dst (SubVHF src1 src2));
25228 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25229 ins_encode %{
25230 int vlen_enc = vector_length_encoding(this);
25231 int opcode = this->ideal_Opcode();
25232 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25233 %}
25234 ins_pipe(pipe_slow);
25235 %}
25236
25237
25238 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25239 %{
25240 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25241 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25242 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25243 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25244 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25245 ins_encode %{
25246 int vlen_enc = vector_length_encoding(this);
25247 int opcode = this->ideal_Opcode();
25248 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25249 %}
25250 ins_pipe(pipe_slow);
25251 %}
25252
25253 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25254 %{
25255 match(Set dst (FmaVHF src2 (Binary dst src1)));
25256 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25257 ins_encode %{
25258 int vlen_enc = vector_length_encoding(this);
25259 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25260 %}
25261 ins_pipe( pipe_slow );
25262 %}
25263
25264 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25265 %{
25266 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25267 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25268 ins_encode %{
25269 int vlen_enc = vector_length_encoding(this);
25270 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25271 %}
25272 ins_pipe( pipe_slow );
25273 %}
25274
25275 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25276 %{
25277 predicate(VM_Version::supports_avx10_2());
25278 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25279 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25280 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25281 ins_encode %{
25282 int vlen_enc = vector_length_encoding(this);
25283 int opcode = this->ideal_Opcode();
25284 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25285 k0, vlen_enc);
25286 %}
25287 ins_pipe( pipe_slow );
25288 %}
25289
25290 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25291 %{
25292 predicate(VM_Version::supports_avx10_2());
25293 match(Set dst (MinVHF src1 src2));
25294 match(Set dst (MaxVHF src1 src2));
25295 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25296 ins_encode %{
25297 int vlen_enc = vector_length_encoding(this);
25298 int opcode = this->ideal_Opcode();
25299 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25300 k0, vlen_enc);
25301 %}
25302 ins_pipe( pipe_slow );
25303 %}
25304
25305 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25306 %{
25307 predicate(!VM_Version::supports_avx10_2());
25308 match(Set dst (MinVHF src1 src2));
25309 match(Set dst (MaxVHF src1 src2));
25310 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25311 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25312 ins_encode %{
25313 int vlen_enc = vector_length_encoding(this);
25314 int opcode = this->ideal_Opcode();
25315 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25316 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25317 %}
25318 ins_pipe( pipe_slow );
25319 %}
25320
25321 //----------PEEPHOLE RULES-----------------------------------------------------
25322 // These must follow all instruction definitions as they use the names
25323 // defined in the instructions definitions.
25324 //
25325 // peeppredicate ( rule_predicate );
25326 // // the predicate unless which the peephole rule will be ignored
25327 //
25328 // peepmatch ( root_instr_name [preceding_instruction]* );
25329 //
25330 // peepprocedure ( procedure_name );
25331 // // provide a procedure name to perform the optimization, the procedure should
25332 // // reside in the architecture dependent peephole file, the method has the
25333 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25334 // // with the arguments being the basic block, the current node index inside the
25335 // // block, the register allocator, the functions upon invoked return a new node
25336 // // defined in peepreplace, and the rules of the nodes appearing in the
25337 // // corresponding peepmatch, the function return true if successful, else
25338 // // return false
25339 //
25340 // peepconstraint %{
25341 // (instruction_number.operand_name relational_op instruction_number.operand_name
25342 // [, ...] );
25343 // // instruction numbers are zero-based using left to right order in peepmatch
25344 //
25345 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25346 // // provide an instruction_number.operand_name for each operand that appears
25347 // // in the replacement instruction's match rule
25348 //
25349 // ---------VM FLAGS---------------------------------------------------------
25350 //
25351 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25352 //
25353 // Each peephole rule is given an identifying number starting with zero and
25354 // increasing by one in the order seen by the parser. An individual peephole
25355 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25356 // on the command-line.
25357 //
25358 // ---------CURRENT LIMITATIONS----------------------------------------------
25359 //
25360 // Only transformations inside a basic block (do we need more for peephole)
25361 //
25362 // ---------EXAMPLE----------------------------------------------------------
25363 //
25364 // // pertinent parts of existing instructions in architecture description
25365 // instruct movI(rRegI dst, rRegI src)
25366 // %{
25367 // match(Set dst (CopyI src));
25368 // %}
25369 //
25370 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25371 // %{
25372 // match(Set dst (AddI dst src));
25373 // effect(KILL cr);
25374 // %}
25375 //
25376 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25377 // %{
25378 // match(Set dst (AddI dst src));
25379 // %}
25380 //
25381 // 1. Simple replacement
25382 // - Only match adjacent instructions in same basic block
25383 // - Only equality constraints
25384 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25385 // - Only one replacement instruction
25386 //
25387 // // Change (inc mov) to lea
25388 // peephole %{
25389 // // lea should only be emitted when beneficial
25390 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25391 // // increment preceded by register-register move
25392 // peepmatch ( incI_rReg movI );
25393 // // require that the destination register of the increment
25394 // // match the destination register of the move
25395 // peepconstraint ( 0.dst == 1.dst );
25396 // // construct a replacement instruction that sets
25397 // // the destination to ( move's source register + one )
25398 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25399 // %}
25400 //
25401 // 2. Procedural replacement
25402 // - More flexible finding relevent nodes
25403 // - More flexible constraints
25404 // - More flexible transformations
25405 // - May utilise architecture-dependent API more effectively
25406 // - Currently only one replacement instruction due to adlc parsing capabilities
25407 //
25408 // // Change (inc mov) to lea
25409 // peephole %{
25410 // // lea should only be emitted when beneficial
25411 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25412 // // the rule numbers of these nodes inside are passed into the function below
25413 // peepmatch ( incI_rReg movI );
25414 // // the method that takes the responsibility of transformation
25415 // peepprocedure ( inc_mov_to_lea );
25416 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25417 // // node is passed into the function above
25418 // peepreplace ( leaI_rReg_immI() );
25419 // %}
25420
25421 // These instructions is not matched by the matcher but used by the peephole
25422 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25423 %{
25424 predicate(false);
25425 match(Set dst (AddI src1 src2));
25426 format %{ "leal $dst, [$src1 + $src2]" %}
25427 ins_encode %{
25428 Register dst = $dst$$Register;
25429 Register src1 = $src1$$Register;
25430 Register src2 = $src2$$Register;
25431 if (src1 != rbp && src1 != r13) {
25432 __ leal(dst, Address(src1, src2, Address::times_1));
25433 } else {
25434 assert(src2 != rbp && src2 != r13, "");
25435 __ leal(dst, Address(src2, src1, Address::times_1));
25436 }
25437 %}
25438 ins_pipe(ialu_reg_reg);
25439 %}
25440
25441 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25442 %{
25443 predicate(false);
25444 match(Set dst (AddI src1 src2));
25445 format %{ "leal $dst, [$src1 + $src2]" %}
25446 ins_encode %{
25447 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25448 %}
25449 ins_pipe(ialu_reg_reg);
25450 %}
25451
25452 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25453 %{
25454 predicate(false);
25455 match(Set dst (LShiftI src shift));
25456 format %{ "leal $dst, [$src << $shift]" %}
25457 ins_encode %{
25458 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25459 Register src = $src$$Register;
25460 if (scale == Address::times_2 && src != rbp && src != r13) {
25461 __ leal($dst$$Register, Address(src, src, Address::times_1));
25462 } else {
25463 __ leal($dst$$Register, Address(noreg, src, scale));
25464 }
25465 %}
25466 ins_pipe(ialu_reg_reg);
25467 %}
25468
25469 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25470 %{
25471 predicate(false);
25472 match(Set dst (AddL src1 src2));
25473 format %{ "leaq $dst, [$src1 + $src2]" %}
25474 ins_encode %{
25475 Register dst = $dst$$Register;
25476 Register src1 = $src1$$Register;
25477 Register src2 = $src2$$Register;
25478 if (src1 != rbp && src1 != r13) {
25479 __ leaq(dst, Address(src1, src2, Address::times_1));
25480 } else {
25481 assert(src2 != rbp && src2 != r13, "");
25482 __ leaq(dst, Address(src2, src1, Address::times_1));
25483 }
25484 %}
25485 ins_pipe(ialu_reg_reg);
25486 %}
25487
25488 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25489 %{
25490 predicate(false);
25491 match(Set dst (AddL src1 src2));
25492 format %{ "leaq $dst, [$src1 + $src2]" %}
25493 ins_encode %{
25494 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25495 %}
25496 ins_pipe(ialu_reg_reg);
25497 %}
25498
25499 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25500 %{
25501 predicate(false);
25502 match(Set dst (LShiftL src shift));
25503 format %{ "leaq $dst, [$src << $shift]" %}
25504 ins_encode %{
25505 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25506 Register src = $src$$Register;
25507 if (scale == Address::times_2 && src != rbp && src != r13) {
25508 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25509 } else {
25510 __ leaq($dst$$Register, Address(noreg, src, scale));
25511 }
25512 %}
25513 ins_pipe(ialu_reg_reg);
25514 %}
25515
25516 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25517 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25518 // processors with at least partial ALU support for lea
25519 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25520 // beneficial for processors with full ALU support
25521 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25522
25523 peephole
25524 %{
25525 peeppredicate(VM_Version::supports_fast_2op_lea());
25526 peepmatch (addI_rReg);
25527 peepprocedure (lea_coalesce_reg);
25528 peepreplace (leaI_rReg_rReg_peep());
25529 %}
25530
25531 peephole
25532 %{
25533 peeppredicate(VM_Version::supports_fast_2op_lea());
25534 peepmatch (addI_rReg_imm);
25535 peepprocedure (lea_coalesce_imm);
25536 peepreplace (leaI_rReg_immI_peep());
25537 %}
25538
25539 peephole
25540 %{
25541 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25542 VM_Version::is_intel_cascade_lake());
25543 peepmatch (incI_rReg);
25544 peepprocedure (lea_coalesce_imm);
25545 peepreplace (leaI_rReg_immI_peep());
25546 %}
25547
25548 peephole
25549 %{
25550 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25551 VM_Version::is_intel_cascade_lake());
25552 peepmatch (decI_rReg);
25553 peepprocedure (lea_coalesce_imm);
25554 peepreplace (leaI_rReg_immI_peep());
25555 %}
25556
25557 peephole
25558 %{
25559 peeppredicate(VM_Version::supports_fast_2op_lea());
25560 peepmatch (salI_rReg_immI2);
25561 peepprocedure (lea_coalesce_imm);
25562 peepreplace (leaI_rReg_immI2_peep());
25563 %}
25564
25565 peephole
25566 %{
25567 peeppredicate(VM_Version::supports_fast_2op_lea());
25568 peepmatch (addL_rReg);
25569 peepprocedure (lea_coalesce_reg);
25570 peepreplace (leaL_rReg_rReg_peep());
25571 %}
25572
25573 peephole
25574 %{
25575 peeppredicate(VM_Version::supports_fast_2op_lea());
25576 peepmatch (addL_rReg_imm);
25577 peepprocedure (lea_coalesce_imm);
25578 peepreplace (leaL_rReg_immL32_peep());
25579 %}
25580
25581 peephole
25582 %{
25583 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25584 VM_Version::is_intel_cascade_lake());
25585 peepmatch (incL_rReg);
25586 peepprocedure (lea_coalesce_imm);
25587 peepreplace (leaL_rReg_immL32_peep());
25588 %}
25589
25590 peephole
25591 %{
25592 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25593 VM_Version::is_intel_cascade_lake());
25594 peepmatch (decL_rReg);
25595 peepprocedure (lea_coalesce_imm);
25596 peepreplace (leaL_rReg_immL32_peep());
25597 %}
25598
25599 peephole
25600 %{
25601 peeppredicate(VM_Version::supports_fast_2op_lea());
25602 peepmatch (salL_rReg_immI2);
25603 peepprocedure (lea_coalesce_imm);
25604 peepreplace (leaL_rReg_immI2_peep());
25605 %}
25606
25607 peephole
25608 %{
25609 peepmatch (leaPCompressedOopOffset);
25610 peepprocedure (lea_remove_redundant);
25611 %}
25612
25613 peephole
25614 %{
25615 peepmatch (leaP8Narrow);
25616 peepprocedure (lea_remove_redundant);
25617 %}
25618
25619 peephole
25620 %{
25621 peepmatch (leaP32Narrow);
25622 peepprocedure (lea_remove_redundant);
25623 %}
25624
25625 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25626 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25627
25628 //int variant
25629 peephole
25630 %{
25631 peepmatch (testI_reg);
25632 peepprocedure (test_may_remove);
25633 %}
25634
25635 //long variant
25636 peephole
25637 %{
25638 peepmatch (testL_reg);
25639 peepprocedure (test_may_remove);
25640 %}
25641
25642
25643 //----------SMARTSPILL RULES---------------------------------------------------
25644 // These must follow all instruction definitions as they use the names
25645 // defined in the instructions definitions.