1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 // If any floating point comparison instruction is used, unordered case always triggers jump
1708 // for below condition, CF=1 is true when at least one input is NaN
1709 Label done;
1710 __ movl(dst, -1);
1711 __ jcc(Assembler::below, done);
1712 __ setcc(Assembler::notEqual, dst);
1713 __ bind(done);
1714 }
1715
1716 // Math.min() # Math.max()
1717 // --------------------------
1718 // ucomis[s/d] #
1719 // ja -> b # a
1720 // jp -> NaN # NaN
1721 // jb -> a # b
1722 // je #
1723 // |-jz -> a | b # a & b
1724 // | -> a #
1725 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1726 XMMRegister a, XMMRegister b,
1727 XMMRegister xmmt, Register rt,
1728 bool min, bool single) {
1729
1730 Label nan, zero, below, above, done;
1731
1732 if (single)
1733 __ ucomiss(a, b);
1734 else
1735 __ ucomisd(a, b);
1736
1737 if (dst->encoding() != (min ? b : a)->encoding())
1738 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1739 else
1740 __ jccb(Assembler::above, done);
1741
1742 __ jccb(Assembler::parity, nan); // PF=1
1743 __ jccb(Assembler::below, below); // CF=1
1744
1745 // equal
1746 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1747 if (single) {
1748 __ ucomiss(a, xmmt);
1749 __ jccb(Assembler::equal, zero);
1750
1751 __ movflt(dst, a);
1752 __ jmp(done);
1753 }
1754 else {
1755 __ ucomisd(a, xmmt);
1756 __ jccb(Assembler::equal, zero);
1757
1758 __ movdbl(dst, a);
1759 __ jmp(done);
1760 }
1761
1762 __ bind(zero);
1763 if (min)
1764 __ vpor(dst, a, b, Assembler::AVX_128bit);
1765 else
1766 __ vpand(dst, a, b, Assembler::AVX_128bit);
1767
1768 __ jmp(done);
1769
1770 __ bind(above);
1771 if (single)
1772 __ movflt(dst, min ? b : a);
1773 else
1774 __ movdbl(dst, min ? b : a);
1775
1776 __ jmp(done);
1777
1778 __ bind(nan);
1779 if (single) {
1780 __ movl(rt, 0x7fc00000); // Float.NaN
1781 __ movdl(dst, rt);
1782 }
1783 else {
1784 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1785 __ movdq(dst, rt);
1786 }
1787 __ jmp(done);
1788
1789 __ bind(below);
1790 if (single)
1791 __ movflt(dst, min ? a : b);
1792 else
1793 __ movdbl(dst, min ? a : b);
1794
1795 __ bind(done);
1796 }
1797
1798 //=============================================================================
1799 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1800
1801 int ConstantTable::calculate_table_base_offset() const {
1802 return 0; // absolute addressing, no offset
1803 }
1804
1805 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1806 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1807 ShouldNotReachHere();
1808 }
1809
1810 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1811 // Empty encoding
1812 }
1813
1814 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1815 return 0;
1816 }
1817
1818 #ifndef PRODUCT
1819 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1820 st->print("# MachConstantBaseNode (empty encoding)");
1821 }
1822 #endif
1823
1824
1825 //=============================================================================
1826 #ifndef PRODUCT
1827 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1828 Compile* C = ra_->C;
1829
1830 int framesize = C->output()->frame_size_in_bytes();
1831 int bangsize = C->output()->bang_size_in_bytes();
1832 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1833 // Remove wordSize for return addr which is already pushed.
1834 framesize -= wordSize;
1835
1836 if (C->output()->need_stack_bang(bangsize)) {
1837 framesize -= wordSize;
1838 st->print("# stack bang (%d bytes)", bangsize);
1839 st->print("\n\t");
1840 st->print("pushq rbp\t# Save rbp");
1841 if (PreserveFramePointer) {
1842 st->print("\n\t");
1843 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1844 }
1845 if (framesize) {
1846 st->print("\n\t");
1847 st->print("subq rsp, #%d\t# Create frame",framesize);
1848 }
1849 } else {
1850 st->print("subq rsp, #%d\t# Create frame",framesize);
1851 st->print("\n\t");
1852 framesize -= wordSize;
1853 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1854 if (PreserveFramePointer) {
1855 st->print("\n\t");
1856 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1857 if (framesize > 0) {
1858 st->print("\n\t");
1859 st->print("addq rbp, #%d", framesize);
1860 }
1861 }
1862 }
1863
1864 if (VerifyStackAtCalls) {
1865 st->print("\n\t");
1866 framesize -= wordSize;
1867 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1868 #ifdef ASSERT
1869 st->print("\n\t");
1870 st->print("# stack alignment check");
1871 #endif
1872 }
1873 if (C->stub_function() != nullptr) {
1874 st->print("\n\t");
1875 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1876 st->print("\n\t");
1877 st->print("je fast_entry\t");
1878 st->print("\n\t");
1879 st->print("call #nmethod_entry_barrier_stub\t");
1880 st->print("\n\tfast_entry:");
1881 }
1882 st->cr();
1883 }
1884 #endif
1885
1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1887 Compile* C = ra_->C;
1888
1889 __ verified_entry(C);
1890
1891 if (ra_->C->stub_function() == nullptr) {
1892 __ entry_barrier();
1893 }
1894
1895 if (!Compile::current()->output()->in_scratch_emit_size()) {
1896 __ bind(*_verified_entry);
1897 }
1898
1899 C->output()->set_frame_complete(__ offset());
1900
1901 if (C->has_mach_constant_base_node()) {
1902 // NOTE: We set the table base offset here because users might be
1903 // emitted before MachConstantBaseNode.
1904 ConstantTable& constant_table = C->output()->constant_table();
1905 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1906 }
1907 }
1908
1909
1910 int MachPrologNode::reloc() const
1911 {
1912 return 0; // a large enough number
1913 }
1914
1915 //=============================================================================
1916 #ifndef PRODUCT
1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1918 {
1919 Compile* C = ra_->C;
1920 if (generate_vzeroupper(C)) {
1921 st->print("vzeroupper");
1922 st->cr(); st->print("\t");
1923 }
1924
1925 int framesize = C->output()->frame_size_in_bytes();
1926 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1927 // Remove word for return adr already pushed
1928 // and RBP
1929 framesize -= 2*wordSize;
1930
1931 if (framesize) {
1932 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1933 st->print("\t");
1934 }
1935
1936 st->print_cr("popq rbp");
1937 if (do_polling() && C->is_method_compilation()) {
1938 st->print("\t");
1939 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1940 "ja #safepoint_stub\t"
1941 "# Safepoint: poll for GC");
1942 }
1943 }
1944 #endif
1945
1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1947 {
1948 Compile* C = ra_->C;
1949
1950 if (generate_vzeroupper(C)) {
1951 // Clear upper bits of YMM registers when current compiled code uses
1952 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1953 __ vzeroupper();
1954 }
1955
1956 // Subtract two words to account for return address and rbp
1957 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1958 __ remove_frame(initial_framesize, C->needs_stack_repair());
1959
1960 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1961 __ reserved_stack_check();
1962 }
1963
1964 if (do_polling() && C->is_method_compilation()) {
1965 Label dummy_label;
1966 Label* code_stub = &dummy_label;
1967 if (!C->output()->in_scratch_emit_size()) {
1968 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1969 C->output()->add_stub(stub);
1970 code_stub = &stub->entry();
1971 }
1972 __ relocate(relocInfo::poll_return_type);
1973 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1974 }
1975 }
1976
1977 int MachEpilogNode::reloc() const
1978 {
1979 return 2; // a large enough number
1980 }
1981
1982 const Pipeline* MachEpilogNode::pipeline() const
1983 {
1984 return MachNode::pipeline_class();
1985 }
1986
1987 //=============================================================================
1988
1989 enum RC {
1990 rc_bad,
1991 rc_int,
1992 rc_kreg,
1993 rc_float,
1994 rc_stack
1995 };
1996
1997 static enum RC rc_class(OptoReg::Name reg)
1998 {
1999 if( !OptoReg::is_valid(reg) ) return rc_bad;
2000
2001 if (OptoReg::is_stack(reg)) return rc_stack;
2002
2003 VMReg r = OptoReg::as_VMReg(reg);
2004
2005 if (r->is_Register()) return rc_int;
2006
2007 if (r->is_KRegister()) return rc_kreg;
2008
2009 assert(r->is_XMMRegister(), "must be");
2010 return rc_float;
2011 }
2012
2013 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2014 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2015 int src_hi, int dst_hi, uint ireg, outputStream* st);
2016
2017 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2018 int stack_offset, int reg, uint ireg, outputStream* st);
2019
2020 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2021 int dst_offset, uint ireg, outputStream* st) {
2022 if (masm) {
2023 switch (ireg) {
2024 case Op_VecS:
2025 __ movq(Address(rsp, -8), rax);
2026 __ movl(rax, Address(rsp, src_offset));
2027 __ movl(Address(rsp, dst_offset), rax);
2028 __ movq(rax, Address(rsp, -8));
2029 break;
2030 case Op_VecD:
2031 __ pushq(Address(rsp, src_offset));
2032 __ popq (Address(rsp, dst_offset));
2033 break;
2034 case Op_VecX:
2035 __ pushq(Address(rsp, src_offset));
2036 __ popq (Address(rsp, dst_offset));
2037 __ pushq(Address(rsp, src_offset+8));
2038 __ popq (Address(rsp, dst_offset+8));
2039 break;
2040 case Op_VecY:
2041 __ vmovdqu(Address(rsp, -32), xmm0);
2042 __ vmovdqu(xmm0, Address(rsp, src_offset));
2043 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2044 __ vmovdqu(xmm0, Address(rsp, -32));
2045 break;
2046 case Op_VecZ:
2047 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2048 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2049 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2050 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2051 break;
2052 default:
2053 ShouldNotReachHere();
2054 }
2055 #ifndef PRODUCT
2056 } else {
2057 switch (ireg) {
2058 case Op_VecS:
2059 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2060 "movl rax, [rsp + #%d]\n\t"
2061 "movl [rsp + #%d], rax\n\t"
2062 "movq rax, [rsp - #8]",
2063 src_offset, dst_offset);
2064 break;
2065 case Op_VecD:
2066 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2067 "popq [rsp + #%d]",
2068 src_offset, dst_offset);
2069 break;
2070 case Op_VecX:
2071 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2072 "popq [rsp + #%d]\n\t"
2073 "pushq [rsp + #%d]\n\t"
2074 "popq [rsp + #%d]",
2075 src_offset, dst_offset, src_offset+8, dst_offset+8);
2076 break;
2077 case Op_VecY:
2078 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2079 "vmovdqu xmm0, [rsp + #%d]\n\t"
2080 "vmovdqu [rsp + #%d], xmm0\n\t"
2081 "vmovdqu xmm0, [rsp - #32]",
2082 src_offset, dst_offset);
2083 break;
2084 case Op_VecZ:
2085 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2086 "vmovdqu xmm0, [rsp + #%d]\n\t"
2087 "vmovdqu [rsp + #%d], xmm0\n\t"
2088 "vmovdqu xmm0, [rsp - #64]",
2089 src_offset, dst_offset);
2090 break;
2091 default:
2092 ShouldNotReachHere();
2093 }
2094 #endif
2095 }
2096 }
2097
2098 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2099 PhaseRegAlloc* ra_,
2100 bool do_size,
2101 outputStream* st) const {
2102 assert(masm != nullptr || st != nullptr, "sanity");
2103 // Get registers to move
2104 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2105 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2106 OptoReg::Name dst_second = ra_->get_reg_second(this);
2107 OptoReg::Name dst_first = ra_->get_reg_first(this);
2108
2109 enum RC src_second_rc = rc_class(src_second);
2110 enum RC src_first_rc = rc_class(src_first);
2111 enum RC dst_second_rc = rc_class(dst_second);
2112 enum RC dst_first_rc = rc_class(dst_first);
2113
2114 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2115 "must move at least 1 register" );
2116
2117 if (src_first == dst_first && src_second == dst_second) {
2118 // Self copy, no move
2119 return 0;
2120 }
2121 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2122 uint ireg = ideal_reg();
2123 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2124 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2125 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2126 // mem -> mem
2127 int src_offset = ra_->reg2offset(src_first);
2128 int dst_offset = ra_->reg2offset(dst_first);
2129 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2130 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2131 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2132 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2133 int stack_offset = ra_->reg2offset(dst_first);
2134 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2135 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2136 int stack_offset = ra_->reg2offset(src_first);
2137 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2138 } else {
2139 ShouldNotReachHere();
2140 }
2141 return 0;
2142 }
2143 if (src_first_rc == rc_stack) {
2144 // mem ->
2145 if (dst_first_rc == rc_stack) {
2146 // mem -> mem
2147 assert(src_second != dst_first, "overlap");
2148 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2149 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2150 // 64-bit
2151 int src_offset = ra_->reg2offset(src_first);
2152 int dst_offset = ra_->reg2offset(dst_first);
2153 if (masm) {
2154 __ pushq(Address(rsp, src_offset));
2155 __ popq (Address(rsp, dst_offset));
2156 #ifndef PRODUCT
2157 } else {
2158 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2159 "popq [rsp + #%d]",
2160 src_offset, dst_offset);
2161 #endif
2162 }
2163 } else {
2164 // 32-bit
2165 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2166 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2167 // No pushl/popl, so:
2168 int src_offset = ra_->reg2offset(src_first);
2169 int dst_offset = ra_->reg2offset(dst_first);
2170 if (masm) {
2171 __ movq(Address(rsp, -8), rax);
2172 __ movl(rax, Address(rsp, src_offset));
2173 __ movl(Address(rsp, dst_offset), rax);
2174 __ movq(rax, Address(rsp, -8));
2175 #ifndef PRODUCT
2176 } else {
2177 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2178 "movl rax, [rsp + #%d]\n\t"
2179 "movl [rsp + #%d], rax\n\t"
2180 "movq rax, [rsp - #8]",
2181 src_offset, dst_offset);
2182 #endif
2183 }
2184 }
2185 return 0;
2186 } else if (dst_first_rc == rc_int) {
2187 // mem -> gpr
2188 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2189 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2190 // 64-bit
2191 int offset = ra_->reg2offset(src_first);
2192 if (masm) {
2193 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2194 #ifndef PRODUCT
2195 } else {
2196 st->print("movq %s, [rsp + #%d]\t# spill",
2197 Matcher::regName[dst_first],
2198 offset);
2199 #endif
2200 }
2201 } else {
2202 // 32-bit
2203 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2204 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2205 int offset = ra_->reg2offset(src_first);
2206 if (masm) {
2207 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2208 #ifndef PRODUCT
2209 } else {
2210 st->print("movl %s, [rsp + #%d]\t# spill",
2211 Matcher::regName[dst_first],
2212 offset);
2213 #endif
2214 }
2215 }
2216 return 0;
2217 } else if (dst_first_rc == rc_float) {
2218 // mem-> xmm
2219 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2220 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2221 // 64-bit
2222 int offset = ra_->reg2offset(src_first);
2223 if (masm) {
2224 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2225 #ifndef PRODUCT
2226 } else {
2227 st->print("%s %s, [rsp + #%d]\t# spill",
2228 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2229 Matcher::regName[dst_first],
2230 offset);
2231 #endif
2232 }
2233 } else {
2234 // 32-bit
2235 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2236 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("movss %s, [rsp + #%d]\t# spill",
2243 Matcher::regName[dst_first],
2244 offset);
2245 #endif
2246 }
2247 }
2248 return 0;
2249 } else if (dst_first_rc == rc_kreg) {
2250 // mem -> kreg
2251 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2252 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2253 // 64-bit
2254 int offset = ra_->reg2offset(src_first);
2255 if (masm) {
2256 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2257 #ifndef PRODUCT
2258 } else {
2259 st->print("kmovq %s, [rsp + #%d]\t# spill",
2260 Matcher::regName[dst_first],
2261 offset);
2262 #endif
2263 }
2264 }
2265 return 0;
2266 }
2267 } else if (src_first_rc == rc_int) {
2268 // gpr ->
2269 if (dst_first_rc == rc_stack) {
2270 // gpr -> mem
2271 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2272 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2273 // 64-bit
2274 int offset = ra_->reg2offset(dst_first);
2275 if (masm) {
2276 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2277 #ifndef PRODUCT
2278 } else {
2279 st->print("movq [rsp + #%d], %s\t# spill",
2280 offset,
2281 Matcher::regName[src_first]);
2282 #endif
2283 }
2284 } else {
2285 // 32-bit
2286 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2287 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2288 int offset = ra_->reg2offset(dst_first);
2289 if (masm) {
2290 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2291 #ifndef PRODUCT
2292 } else {
2293 st->print("movl [rsp + #%d], %s\t# spill",
2294 offset,
2295 Matcher::regName[src_first]);
2296 #endif
2297 }
2298 }
2299 return 0;
2300 } else if (dst_first_rc == rc_int) {
2301 // gpr -> gpr
2302 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2303 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2304 // 64-bit
2305 if (masm) {
2306 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2307 as_Register(Matcher::_regEncode[src_first]));
2308 #ifndef PRODUCT
2309 } else {
2310 st->print("movq %s, %s\t# spill",
2311 Matcher::regName[dst_first],
2312 Matcher::regName[src_first]);
2313 #endif
2314 }
2315 return 0;
2316 } else {
2317 // 32-bit
2318 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2319 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2320 if (masm) {
2321 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2322 as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movl %s, %s\t# spill",
2326 Matcher::regName[dst_first],
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 return 0;
2331 }
2332 } else if (dst_first_rc == rc_float) {
2333 // gpr -> xmm
2334 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2335 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2336 // 64-bit
2337 if (masm) {
2338 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2339 #ifndef PRODUCT
2340 } else {
2341 st->print("movdq %s, %s\t# spill",
2342 Matcher::regName[dst_first],
2343 Matcher::regName[src_first]);
2344 #endif
2345 }
2346 } else {
2347 // 32-bit
2348 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2349 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2350 if (masm) {
2351 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2352 #ifndef PRODUCT
2353 } else {
2354 st->print("movdl %s, %s\t# spill",
2355 Matcher::regName[dst_first],
2356 Matcher::regName[src_first]);
2357 #endif
2358 }
2359 }
2360 return 0;
2361 } else if (dst_first_rc == rc_kreg) {
2362 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2363 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2364 // 64-bit
2365 if (masm) {
2366 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2367 #ifndef PRODUCT
2368 } else {
2369 st->print("kmovq %s, %s\t# spill",
2370 Matcher::regName[dst_first],
2371 Matcher::regName[src_first]);
2372 #endif
2373 }
2374 }
2375 Unimplemented();
2376 return 0;
2377 }
2378 } else if (src_first_rc == rc_float) {
2379 // xmm ->
2380 if (dst_first_rc == rc_stack) {
2381 // xmm -> mem
2382 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2383 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2384 // 64-bit
2385 int offset = ra_->reg2offset(dst_first);
2386 if (masm) {
2387 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2388 #ifndef PRODUCT
2389 } else {
2390 st->print("movsd [rsp + #%d], %s\t# spill",
2391 offset,
2392 Matcher::regName[src_first]);
2393 #endif
2394 }
2395 } else {
2396 // 32-bit
2397 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2398 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2399 int offset = ra_->reg2offset(dst_first);
2400 if (masm) {
2401 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2402 #ifndef PRODUCT
2403 } else {
2404 st->print("movss [rsp + #%d], %s\t# spill",
2405 offset,
2406 Matcher::regName[src_first]);
2407 #endif
2408 }
2409 }
2410 return 0;
2411 } else if (dst_first_rc == rc_int) {
2412 // xmm -> gpr
2413 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2414 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2415 // 64-bit
2416 if (masm) {
2417 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2418 #ifndef PRODUCT
2419 } else {
2420 st->print("movdq %s, %s\t# spill",
2421 Matcher::regName[dst_first],
2422 Matcher::regName[src_first]);
2423 #endif
2424 }
2425 } else {
2426 // 32-bit
2427 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2428 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2429 if (masm) {
2430 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2431 #ifndef PRODUCT
2432 } else {
2433 st->print("movdl %s, %s\t# spill",
2434 Matcher::regName[dst_first],
2435 Matcher::regName[src_first]);
2436 #endif
2437 }
2438 }
2439 return 0;
2440 } else if (dst_first_rc == rc_float) {
2441 // xmm -> xmm
2442 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2443 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2444 // 64-bit
2445 if (masm) {
2446 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2447 #ifndef PRODUCT
2448 } else {
2449 st->print("%s %s, %s\t# spill",
2450 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2451 Matcher::regName[dst_first],
2452 Matcher::regName[src_first]);
2453 #endif
2454 }
2455 } else {
2456 // 32-bit
2457 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2458 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2459 if (masm) {
2460 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2461 #ifndef PRODUCT
2462 } else {
2463 st->print("%s %s, %s\t# spill",
2464 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2465 Matcher::regName[dst_first],
2466 Matcher::regName[src_first]);
2467 #endif
2468 }
2469 }
2470 return 0;
2471 } else if (dst_first_rc == rc_kreg) {
2472 assert(false, "Illegal spilling");
2473 return 0;
2474 }
2475 } else if (src_first_rc == rc_kreg) {
2476 if (dst_first_rc == rc_stack) {
2477 // mem -> kreg
2478 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2479 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2480 // 64-bit
2481 int offset = ra_->reg2offset(dst_first);
2482 if (masm) {
2483 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2484 #ifndef PRODUCT
2485 } else {
2486 st->print("kmovq [rsp + #%d] , %s\t# spill",
2487 offset,
2488 Matcher::regName[src_first]);
2489 #endif
2490 }
2491 }
2492 return 0;
2493 } else if (dst_first_rc == rc_int) {
2494 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2495 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2496 // 64-bit
2497 if (masm) {
2498 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2499 #ifndef PRODUCT
2500 } else {
2501 st->print("kmovq %s, %s\t# spill",
2502 Matcher::regName[dst_first],
2503 Matcher::regName[src_first]);
2504 #endif
2505 }
2506 }
2507 Unimplemented();
2508 return 0;
2509 } else if (dst_first_rc == rc_kreg) {
2510 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2511 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2512 // 64-bit
2513 if (masm) {
2514 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2515 #ifndef PRODUCT
2516 } else {
2517 st->print("kmovq %s, %s\t# spill",
2518 Matcher::regName[dst_first],
2519 Matcher::regName[src_first]);
2520 #endif
2521 }
2522 }
2523 return 0;
2524 } else if (dst_first_rc == rc_float) {
2525 assert(false, "Illegal spill");
2526 return 0;
2527 }
2528 }
2529
2530 assert(0," foo ");
2531 Unimplemented();
2532 return 0;
2533 }
2534
2535 #ifndef PRODUCT
2536 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2537 implementation(nullptr, ra_, false, st);
2538 }
2539 #endif
2540
2541 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2542 implementation(masm, ra_, false, nullptr);
2543 }
2544
2545 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2546 return MachNode::size(ra_);
2547 }
2548
2549 //=============================================================================
2550 #ifndef PRODUCT
2551 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2552 {
2553 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2554 int reg = ra_->get_reg_first(this);
2555 st->print("leaq %s, [rsp + #%d]\t# box lock",
2556 Matcher::regName[reg], offset);
2557 }
2558 #endif
2559
2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2561 {
2562 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2563 int reg = ra_->get_encode(this);
2564
2565 __ lea(as_Register(reg), Address(rsp, offset));
2566 }
2567
2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2569 {
2570 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2571 if (ra_->get_encode(this) > 15) {
2572 return (offset < 0x80) ? 6 : 9; // REX2
2573 } else {
2574 return (offset < 0x80) ? 5 : 8; // REX
2575 }
2576 }
2577
2578 //=============================================================================
2579 #ifndef PRODUCT
2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2581 {
2582 st->print_cr("MachVEPNode");
2583 }
2584 #endif
2585
2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 CodeBuffer* cbuf = masm->code();
2589 uint insts_size = cbuf->insts_size();
2590 if (!_verified) {
2591 __ ic_check(1);
2592 } else {
2593 // TODO 8284443 Avoid creation of temporary frame
2594 if (ra_->C->stub_function() == nullptr) {
2595 __ verified_entry(ra_->C, 0);
2596 __ entry_barrier();
2597 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2598 __ remove_frame(initial_framesize, false);
2599 }
2600 // Unpack inline type args passed as oop and then jump to
2601 // the verified entry point (skipping the unverified entry).
2602 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2603 // Emit code for verified entry and save increment for stack repair on return
2604 __ verified_entry(ra_->C, sp_inc);
2605 if (Compile::current()->output()->in_scratch_emit_size()) {
2606 Label dummy_verified_entry;
2607 __ jmp(dummy_verified_entry);
2608 } else {
2609 __ jmp(*_verified_entry);
2610 }
2611 }
2612 /* WARNING these NOPs are critical so that verified entry point is properly
2613 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2614 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2615 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2616 if (nops_cnt > 0) {
2617 __ nop(nops_cnt);
2618 }
2619 }
2620
2621 //=============================================================================
2622 #ifndef PRODUCT
2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2624 {
2625 if (UseCompressedClassPointers) {
2626 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2627 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2628 } else {
2629 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2630 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2631 }
2632 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2633 }
2634 #endif
2635
2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2637 {
2638 __ ic_check(InteriorEntryAlignment);
2639 }
2640
2641
2642 //=============================================================================
2643
2644 bool Matcher::supports_vector_calling_convention(void) {
2645 return EnableVectorSupport;
2646 }
2647
2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2649 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2650 }
2651
2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2653 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2654 }
2655
2656 #ifdef ASSERT
2657 static bool is_ndd_demotable(const MachNode* mdef) {
2658 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2659 }
2660 #endif
2661
2662 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2663 int oper_index) {
2664 if (mdef == nullptr) {
2665 return false;
2666 }
2667
2668 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2669 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2670 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2671 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2672 return false;
2673 }
2674
2675 // Complex memory operand covers multiple incoming edges needed for
2676 // address computation. Biasing def towards any address component will not
2677 // result in NDD demotion by assembler.
2678 if (mdef->operand_num_edges(oper_index) != 1) {
2679 return false;
2680 }
2681
2682 // Demotion candidate must be register mask compatible with definition.
2683 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2684 if (!oper_mask.overlap(mdef->out_RegMask())) {
2685 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2686 return false;
2687 }
2688
2689 switch (oper_index) {
2690 // First operand of MachNode corresponding to Intel APX NDD selection
2691 // pattern can share its assigned register with definition operand if
2692 // their live ranges do not overlap. In such a scenario we can demote
2693 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2694 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2695 // are decorated with a special flag by instruction selector.
2696 case 1:
2697 return is_ndd_demotable_opr1(mdef);
2698
2699 // Definition operand of commutative operation can be biased towards second
2700 // operand.
2701 case 2:
2702 return is_ndd_demotable_opr2(mdef);
2703
2704 // Current scheme only selects up to two biasing candidates
2705 default:
2706 assert(false, "unhandled operand index: %s", mdef->Name());
2707 break;
2708 }
2709
2710 return false;
2711 }
2712
2713 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2714 assert(EnableVectorSupport, "sanity");
2715 int lo = XMM0_num;
2716 int hi = XMM0b_num;
2717 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2718 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2719 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2720 return OptoRegPair(hi, lo);
2721 }
2722
2723 // Is this branch offset short enough that a short branch can be used?
2724 //
2725 // NOTE: If the platform does not provide any short branch variants, then
2726 // this method should return false for offset 0.
2727 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2728 // The passed offset is relative to address of the branch.
2729 // On 86 a branch displacement is calculated relative to address
2730 // of a next instruction.
2731 offset -= br_size;
2732
2733 // the short version of jmpConUCF2 contains multiple branches,
2734 // making the reach slightly less
2735 if (rule == jmpConUCF2_rule)
2736 return (-126 <= offset && offset <= 125);
2737 return (-128 <= offset && offset <= 127);
2738 }
2739
2740 #ifdef ASSERT
2741 // Return whether or not this register is ever used as an argument.
2742 bool Matcher::can_be_java_arg(int reg)
2743 {
2744 return
2745 reg == RDI_num || reg == RDI_H_num ||
2746 reg == RSI_num || reg == RSI_H_num ||
2747 reg == RDX_num || reg == RDX_H_num ||
2748 reg == RCX_num || reg == RCX_H_num ||
2749 reg == R8_num || reg == R8_H_num ||
2750 reg == R9_num || reg == R9_H_num ||
2751 reg == R12_num || reg == R12_H_num ||
2752 reg == XMM0_num || reg == XMM0b_num ||
2753 reg == XMM1_num || reg == XMM1b_num ||
2754 reg == XMM2_num || reg == XMM2b_num ||
2755 reg == XMM3_num || reg == XMM3b_num ||
2756 reg == XMM4_num || reg == XMM4b_num ||
2757 reg == XMM5_num || reg == XMM5b_num ||
2758 reg == XMM6_num || reg == XMM6b_num ||
2759 reg == XMM7_num || reg == XMM7b_num;
2760 }
2761 #endif
2762
2763 uint Matcher::int_pressure_limit()
2764 {
2765 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2766 }
2767
2768 uint Matcher::float_pressure_limit()
2769 {
2770 // After experiment around with different values, the following default threshold
2771 // works best for LCM's register pressure scheduling on x64.
2772 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2773 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2774 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2775 }
2776
2777 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2778 // In 64 bit mode a code which use multiply when
2779 // devisor is constant is faster than hardware
2780 // DIV instruction (it uses MulHiL).
2781 return false;
2782 }
2783
2784 // Register for DIVI projection of divmodI
2785 const RegMask& Matcher::divI_proj_mask() {
2786 return INT_RAX_REG_mask();
2787 }
2788
2789 // Register for MODI projection of divmodI
2790 const RegMask& Matcher::modI_proj_mask() {
2791 return INT_RDX_REG_mask();
2792 }
2793
2794 // Register for DIVL projection of divmodL
2795 const RegMask& Matcher::divL_proj_mask() {
2796 return LONG_RAX_REG_mask();
2797 }
2798
2799 // Register for MODL projection of divmodL
2800 const RegMask& Matcher::modL_proj_mask() {
2801 return LONG_RDX_REG_mask();
2802 }
2803
2804 %}
2805
2806 source_hpp %{
2807 // Header information of the source block.
2808 // Method declarations/definitions which are used outside
2809 // the ad-scope can conveniently be defined here.
2810 //
2811 // To keep related declarations/definitions/uses close together,
2812 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2813
2814 #include "runtime/vm_version.hpp"
2815
2816 class NativeJump;
2817
2818 class CallStubImpl {
2819
2820 //--------------------------------------------------------------
2821 //---< Used for optimization in Compile::shorten_branches >---
2822 //--------------------------------------------------------------
2823
2824 public:
2825 // Size of call trampoline stub.
2826 static uint size_call_trampoline() {
2827 return 0; // no call trampolines on this platform
2828 }
2829
2830 // number of relocations needed by a call trampoline stub
2831 static uint reloc_call_trampoline() {
2832 return 0; // no call trampolines on this platform
2833 }
2834 };
2835
2836 class HandlerImpl {
2837
2838 public:
2839
2840 static int emit_deopt_handler(C2_MacroAssembler* masm);
2841
2842 static uint size_deopt_handler() {
2843 // one call and one jmp.
2844 return 7;
2845 }
2846 };
2847
2848 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2849 switch(bytes) {
2850 case 4: // fall-through
2851 case 8: // fall-through
2852 case 16: return Assembler::AVX_128bit;
2853 case 32: return Assembler::AVX_256bit;
2854 case 64: return Assembler::AVX_512bit;
2855
2856 default: {
2857 ShouldNotReachHere();
2858 return Assembler::AVX_NoVec;
2859 }
2860 }
2861 }
2862
2863 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2864 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2865 }
2866
2867 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2868 uint def_idx = use->operand_index(opnd);
2869 Node* def = use->in(def_idx);
2870 return vector_length_encoding(def);
2871 }
2872
2873 static inline bool is_vector_popcount_predicate(BasicType bt) {
2874 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2875 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2876 }
2877
2878 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2879 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2880 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2881 }
2882
2883 class Node::PD {
2884 public:
2885 enum NodeFlags : uint64_t {
2886 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2887 Flag_sets_carry_flag = Node::_last_flag << 2,
2888 Flag_sets_parity_flag = Node::_last_flag << 3,
2889 Flag_sets_zero_flag = Node::_last_flag << 4,
2890 Flag_sets_overflow_flag = Node::_last_flag << 5,
2891 Flag_sets_sign_flag = Node::_last_flag << 6,
2892 Flag_clears_carry_flag = Node::_last_flag << 7,
2893 Flag_clears_parity_flag = Node::_last_flag << 8,
2894 Flag_clears_zero_flag = Node::_last_flag << 9,
2895 Flag_clears_overflow_flag = Node::_last_flag << 10,
2896 Flag_clears_sign_flag = Node::_last_flag << 11,
2897 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2898 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2899 _last_flag = Flag_ndd_demotable_opr2
2900 };
2901 };
2902
2903 %} // end source_hpp
2904
2905 source %{
2906
2907 #include "opto/addnode.hpp"
2908 #include "c2_intelJccErratum_x86.hpp"
2909
2910 void PhaseOutput::pd_perform_mach_node_analysis() {
2911 if (VM_Version::has_intel_jcc_erratum()) {
2912 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2913 _buf_sizes._code += extra_padding;
2914 }
2915 }
2916
2917 int MachNode::pd_alignment_required() const {
2918 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2919 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2920 return IntelJccErratum::largest_jcc_size() + 1;
2921 } else {
2922 return 1;
2923 }
2924 }
2925
2926 int MachNode::compute_padding(int current_offset) const {
2927 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2928 Compile* C = Compile::current();
2929 PhaseOutput* output = C->output();
2930 Block* block = output->block();
2931 int index = output->index();
2932 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2933 } else {
2934 return 0;
2935 }
2936 }
2937
2938 // Emit deopt handler code.
2939 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2940
2941 // Note that the code buffer's insts_mark is always relative to insts.
2942 // That's why we must use the macroassembler to generate a handler.
2943 address base = __ start_a_stub(size_deopt_handler());
2944 if (base == nullptr) {
2945 ciEnv::current()->record_failure("CodeCache is full");
2946 return 0; // CodeBuffer::expand failed
2947 }
2948 int offset = __ offset();
2949
2950 Label start;
2951 __ bind(start);
2952
2953 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2954
2955 int entry_offset = __ offset();
2956
2957 __ jmp(start);
2958
2959 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2960 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2961 "out of bounds read in post-call NOP check");
2962 __ end_a_stub();
2963 return entry_offset;
2964 }
2965
2966 static Assembler::Width widthForType(BasicType bt) {
2967 if (bt == T_BYTE) {
2968 return Assembler::B;
2969 } else if (bt == T_SHORT) {
2970 return Assembler::W;
2971 } else if (bt == T_INT) {
2972 return Assembler::D;
2973 } else {
2974 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2975 return Assembler::Q;
2976 }
2977 }
2978
2979 //=============================================================================
2980
2981 // Float masks come from different places depending on platform.
2982 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2983 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2984 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2985 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2986 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2987 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2988 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2989 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2990 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2991 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2992 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2993 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2994 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2995 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2996 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2997 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2998 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2999 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
3000 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
3001
3002 //=============================================================================
3003 bool Matcher::match_rule_supported(int opcode) {
3004 if (!has_match_rule(opcode)) {
3005 return false; // no match rule present
3006 }
3007 switch (opcode) {
3008 case Op_AbsVL:
3009 case Op_StoreVectorScatter:
3010 if (UseAVX < 3) {
3011 return false;
3012 }
3013 break;
3014 case Op_PopCountI:
3015 case Op_PopCountL:
3016 if (!UsePopCountInstruction) {
3017 return false;
3018 }
3019 break;
3020 case Op_PopCountVI:
3021 if (UseAVX < 2) {
3022 return false;
3023 }
3024 break;
3025 case Op_CompressV:
3026 case Op_ExpandV:
3027 case Op_PopCountVL:
3028 if (UseAVX < 2) {
3029 return false;
3030 }
3031 break;
3032 case Op_MulVI:
3033 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3034 return false;
3035 }
3036 break;
3037 case Op_MulVL:
3038 if (UseSSE < 4) { // only with SSE4_1 or AVX
3039 return false;
3040 }
3041 break;
3042 case Op_MulReductionVL:
3043 if (VM_Version::supports_avx512dq() == false) {
3044 return false;
3045 }
3046 break;
3047 case Op_AbsVB:
3048 case Op_AbsVS:
3049 case Op_AbsVI:
3050 case Op_AddReductionVI:
3051 case Op_AndReductionV:
3052 case Op_OrReductionV:
3053 case Op_XorReductionV:
3054 if (UseSSE < 3) { // requires at least SSSE3
3055 return false;
3056 }
3057 break;
3058 case Op_MaxHF:
3059 case Op_MinHF:
3060 if (!VM_Version::supports_avx512vlbw()) {
3061 return false;
3062 } // fallthrough
3063 case Op_AddHF:
3064 case Op_DivHF:
3065 case Op_FmaHF:
3066 case Op_MulHF:
3067 case Op_ReinterpretS2HF:
3068 case Op_ReinterpretHF2S:
3069 case Op_SubHF:
3070 case Op_SqrtHF:
3071 if (!VM_Version::supports_avx512_fp16()) {
3072 return false;
3073 }
3074 break;
3075 case Op_VectorLoadShuffle:
3076 case Op_VectorRearrange:
3077 case Op_MulReductionVI:
3078 if (UseSSE < 4) { // requires at least SSE4
3079 return false;
3080 }
3081 break;
3082 case Op_IsInfiniteF:
3083 case Op_IsInfiniteD:
3084 if (!VM_Version::supports_avx512dq()) {
3085 return false;
3086 }
3087 break;
3088 case Op_SqrtVD:
3089 case Op_SqrtVF:
3090 case Op_VectorMaskCmp:
3091 case Op_VectorCastB2X:
3092 case Op_VectorCastS2X:
3093 case Op_VectorCastI2X:
3094 case Op_VectorCastL2X:
3095 case Op_VectorCastF2X:
3096 case Op_VectorCastD2X:
3097 case Op_VectorUCastB2X:
3098 case Op_VectorUCastS2X:
3099 case Op_VectorUCastI2X:
3100 case Op_VectorMaskCast:
3101 if (UseAVX < 1) { // enabled for AVX only
3102 return false;
3103 }
3104 break;
3105 case Op_PopulateIndex:
3106 if (UseAVX < 2) {
3107 return false;
3108 }
3109 break;
3110 case Op_RoundVF:
3111 if (UseAVX < 2) { // enabled for AVX2 only
3112 return false;
3113 }
3114 break;
3115 case Op_RoundVD:
3116 if (UseAVX < 3) {
3117 return false; // enabled for AVX3 only
3118 }
3119 break;
3120 case Op_CompareAndSwapL:
3121 case Op_CompareAndSwapP:
3122 break;
3123 case Op_StrIndexOf:
3124 if (!UseSSE42Intrinsics) {
3125 return false;
3126 }
3127 break;
3128 case Op_StrIndexOfChar:
3129 if (!UseSSE42Intrinsics) {
3130 return false;
3131 }
3132 break;
3133 case Op_OnSpinWait:
3134 if (VM_Version::supports_on_spin_wait() == false) {
3135 return false;
3136 }
3137 break;
3138 case Op_MulVB:
3139 case Op_LShiftVB:
3140 case Op_RShiftVB:
3141 case Op_URShiftVB:
3142 case Op_VectorInsert:
3143 case Op_VectorLoadMask:
3144 case Op_VectorStoreMask:
3145 case Op_VectorBlend:
3146 if (UseSSE < 4) {
3147 return false;
3148 }
3149 break;
3150 case Op_MaxD:
3151 case Op_MaxF:
3152 case Op_MinD:
3153 case Op_MinF:
3154 if (UseAVX < 1) { // enabled for AVX only
3155 return false;
3156 }
3157 break;
3158 case Op_CacheWB:
3159 case Op_CacheWBPreSync:
3160 case Op_CacheWBPostSync:
3161 if (!VM_Version::supports_data_cache_line_flush()) {
3162 return false;
3163 }
3164 break;
3165 case Op_ExtractB:
3166 case Op_ExtractL:
3167 case Op_ExtractI:
3168 case Op_RoundDoubleMode:
3169 if (UseSSE < 4) {
3170 return false;
3171 }
3172 break;
3173 case Op_RoundDoubleModeV:
3174 if (VM_Version::supports_avx() == false) {
3175 return false; // 128bit vroundpd is not available
3176 }
3177 break;
3178 case Op_LoadVectorGather:
3179 case Op_LoadVectorGatherMasked:
3180 if (UseAVX < 2) {
3181 return false;
3182 }
3183 break;
3184 case Op_FmaF:
3185 case Op_FmaD:
3186 case Op_FmaVD:
3187 case Op_FmaVF:
3188 if (!UseFMA) {
3189 return false;
3190 }
3191 break;
3192 case Op_MacroLogicV:
3193 if (UseAVX < 3 || !UseVectorMacroLogic) {
3194 return false;
3195 }
3196 break;
3197
3198 case Op_VectorCmpMasked:
3199 case Op_VectorMaskGen:
3200 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3201 return false;
3202 }
3203 break;
3204 case Op_VectorMaskFirstTrue:
3205 case Op_VectorMaskLastTrue:
3206 case Op_VectorMaskTrueCount:
3207 case Op_VectorMaskToLong:
3208 if (UseAVX < 1) {
3209 return false;
3210 }
3211 break;
3212 case Op_RoundF:
3213 case Op_RoundD:
3214 break;
3215 case Op_CopySignD:
3216 case Op_CopySignF:
3217 if (UseAVX < 3) {
3218 return false;
3219 }
3220 if (!VM_Version::supports_avx512vl()) {
3221 return false;
3222 }
3223 break;
3224 case Op_CompressBits:
3225 case Op_ExpandBits:
3226 if (!VM_Version::supports_bmi2()) {
3227 return false;
3228 }
3229 break;
3230 case Op_CompressM:
3231 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3232 return false;
3233 }
3234 break;
3235 case Op_ConvF2HF:
3236 case Op_ConvHF2F:
3237 if (!VM_Version::supports_float16()) {
3238 return false;
3239 }
3240 break;
3241 case Op_VectorCastF2HF:
3242 case Op_VectorCastHF2F:
3243 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3244 return false;
3245 }
3246 break;
3247 }
3248 return true; // Match rules are supported by default.
3249 }
3250
3251 //------------------------------------------------------------------------
3252
3253 static inline bool is_pop_count_instr_target(BasicType bt) {
3254 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3255 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3256 }
3257
3258 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3259 return match_rule_supported_vector(opcode, vlen, bt);
3260 }
3261
3262 // Identify extra cases that we might want to provide match rules for vector nodes and
3263 // other intrinsics guarded with vector length (vlen) and element type (bt).
3264 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3265 if (!match_rule_supported(opcode)) {
3266 return false;
3267 }
3268 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3269 // * SSE2 supports 128bit vectors for all types;
3270 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3271 // * AVX2 supports 256bit vectors for all types;
3272 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3273 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3274 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3275 // And MaxVectorSize is taken into account as well.
3276 if (!vector_size_supported(bt, vlen)) {
3277 return false;
3278 }
3279 // Special cases which require vector length follow:
3280 // * implementation limitations
3281 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3282 // * 128bit vroundpd instruction is present only in AVX1
3283 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3284 switch (opcode) {
3285 case Op_MaxVHF:
3286 case Op_MinVHF:
3287 if (!VM_Version::supports_avx512bw()) {
3288 return false;
3289 }
3290 case Op_AddVHF:
3291 case Op_DivVHF:
3292 case Op_FmaVHF:
3293 case Op_MulVHF:
3294 case Op_SubVHF:
3295 case Op_SqrtVHF:
3296 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3297 return false;
3298 }
3299 if (!VM_Version::supports_avx512_fp16()) {
3300 return false;
3301 }
3302 break;
3303 case Op_AbsVF:
3304 case Op_NegVF:
3305 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3306 return false; // 512bit vandps and vxorps are not available
3307 }
3308 break;
3309 case Op_AbsVD:
3310 case Op_NegVD:
3311 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3312 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3313 }
3314 break;
3315 case Op_RotateRightV:
3316 case Op_RotateLeftV:
3317 if (bt != T_INT && bt != T_LONG) {
3318 return false;
3319 } // fallthrough
3320 case Op_MacroLogicV:
3321 if (!VM_Version::supports_evex() ||
3322 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3323 return false;
3324 }
3325 break;
3326 case Op_ClearArray:
3327 case Op_VectorMaskGen:
3328 case Op_VectorCmpMasked:
3329 if (!VM_Version::supports_avx512bw()) {
3330 return false;
3331 }
3332 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3333 return false;
3334 }
3335 break;
3336 case Op_LoadVectorMasked:
3337 case Op_StoreVectorMasked:
3338 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3339 return false;
3340 }
3341 break;
3342 case Op_UMinV:
3343 case Op_UMaxV:
3344 if (UseAVX == 0) {
3345 return false;
3346 }
3347 break;
3348 case Op_UMinReductionV:
3349 case Op_UMaxReductionV:
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3354 return false;
3355 }
3356 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3357 return false;
3358 }
3359 break;
3360 case Op_MaxV:
3361 case Op_MinV:
3362 if (UseSSE < 4 && is_integral_type(bt)) {
3363 return false;
3364 }
3365 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3366 // Float/Double intrinsics are enabled for AVX family currently.
3367 if (UseAVX == 0) {
3368 return false;
3369 }
3370 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3371 return false;
3372 }
3373 }
3374 break;
3375 case Op_CallLeafVector:
3376 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3377 return false;
3378 }
3379 break;
3380 case Op_AddReductionVI:
3381 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3382 return false;
3383 }
3384 // fallthrough
3385 case Op_AndReductionV:
3386 case Op_OrReductionV:
3387 case Op_XorReductionV:
3388 if (is_subword_type(bt) && (UseSSE < 4)) {
3389 return false;
3390 }
3391 break;
3392 case Op_MinReductionV:
3393 case Op_MaxReductionV:
3394 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3395 return false;
3396 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3397 return false;
3398 }
3399 // Float/Double intrinsics enabled for AVX family.
3400 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3401 return false;
3402 }
3403 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3404 return false;
3405 }
3406 break;
3407 case Op_VectorBlend:
3408 if (UseAVX == 0 && size_in_bits < 128) {
3409 return false;
3410 }
3411 break;
3412 case Op_VectorTest:
3413 if (UseSSE < 4) {
3414 return false; // Implementation limitation
3415 } else if (size_in_bits < 32) {
3416 return false; // Implementation limitation
3417 }
3418 break;
3419 case Op_VectorLoadShuffle:
3420 case Op_VectorRearrange:
3421 if(vlen == 2) {
3422 return false; // Implementation limitation due to how shuffle is loaded
3423 } else if (size_in_bits == 256 && UseAVX < 2) {
3424 return false; // Implementation limitation
3425 }
3426 break;
3427 case Op_VectorLoadMask:
3428 case Op_VectorMaskCast:
3429 if (size_in_bits == 256 && UseAVX < 2) {
3430 return false; // Implementation limitation
3431 }
3432 // fallthrough
3433 case Op_VectorStoreMask:
3434 if (vlen == 2) {
3435 return false; // Implementation limitation
3436 }
3437 break;
3438 case Op_PopulateIndex:
3439 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3440 return false;
3441 }
3442 break;
3443 case Op_VectorCastB2X:
3444 case Op_VectorCastS2X:
3445 case Op_VectorCastI2X:
3446 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3447 return false;
3448 }
3449 break;
3450 case Op_VectorCastL2X:
3451 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3452 return false;
3453 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3454 return false;
3455 }
3456 break;
3457 case Op_VectorCastF2X: {
3458 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3459 // happen after intermediate conversion to integer and special handling
3460 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3461 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3462 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3463 return false;
3464 }
3465 }
3466 // fallthrough
3467 case Op_VectorCastD2X:
3468 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3469 return false;
3470 }
3471 break;
3472 case Op_VectorCastF2HF:
3473 case Op_VectorCastHF2F:
3474 if (!VM_Version::supports_f16c() &&
3475 ((!VM_Version::supports_evex() ||
3476 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3477 return false;
3478 }
3479 break;
3480 case Op_RoundVD:
3481 if (!VM_Version::supports_avx512dq()) {
3482 return false;
3483 }
3484 break;
3485 case Op_MulReductionVI:
3486 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3487 return false;
3488 }
3489 break;
3490 case Op_LoadVectorGatherMasked:
3491 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3492 return false;
3493 }
3494 if (is_subword_type(bt) &&
3495 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3496 (size_in_bits < 64) ||
3497 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3498 return false;
3499 }
3500 break;
3501 case Op_StoreVectorScatterMasked:
3502 case Op_StoreVectorScatter:
3503 if (is_subword_type(bt)) {
3504 return false;
3505 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3506 return false;
3507 }
3508 // fallthrough
3509 case Op_LoadVectorGather:
3510 if (!is_subword_type(bt) && size_in_bits == 64) {
3511 return false;
3512 }
3513 if (is_subword_type(bt) && size_in_bits < 64) {
3514 return false;
3515 }
3516 break;
3517 case Op_SaturatingAddV:
3518 case Op_SaturatingSubV:
3519 if (UseAVX < 1) {
3520 return false; // Implementation limitation
3521 }
3522 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3523 return false;
3524 }
3525 break;
3526 case Op_SelectFromTwoVector:
3527 if (size_in_bits < 128) {
3528 return false;
3529 }
3530 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3531 return false;
3532 }
3533 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3534 return false;
3535 }
3536 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3537 return false;
3538 }
3539 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3540 return false;
3541 }
3542 break;
3543 case Op_MaskAll:
3544 if (!VM_Version::supports_evex()) {
3545 return false;
3546 }
3547 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3548 return false;
3549 }
3550 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3551 return false;
3552 }
3553 break;
3554 case Op_VectorMaskCmp:
3555 if (vlen < 2 || size_in_bits < 32) {
3556 return false;
3557 }
3558 break;
3559 case Op_CompressM:
3560 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3561 return false;
3562 }
3563 break;
3564 case Op_CompressV:
3565 case Op_ExpandV:
3566 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3567 return false;
3568 }
3569 if (size_in_bits < 128 ) {
3570 return false;
3571 }
3572 case Op_VectorLongToMask:
3573 if (UseAVX < 1) {
3574 return false;
3575 }
3576 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3577 return false;
3578 }
3579 break;
3580 case Op_SignumVD:
3581 case Op_SignumVF:
3582 if (UseAVX < 1) {
3583 return false;
3584 }
3585 break;
3586 case Op_PopCountVI:
3587 case Op_PopCountVL: {
3588 if (!is_pop_count_instr_target(bt) &&
3589 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3590 return false;
3591 }
3592 }
3593 break;
3594 case Op_ReverseV:
3595 case Op_ReverseBytesV:
3596 if (UseAVX < 2) {
3597 return false;
3598 }
3599 break;
3600 case Op_CountTrailingZerosV:
3601 case Op_CountLeadingZerosV:
3602 if (UseAVX < 2) {
3603 return false;
3604 }
3605 break;
3606 }
3607 return true; // Per default match rules are supported.
3608 }
3609
3610 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3611 // ADLC based match_rule_supported routine checks for the existence of pattern based
3612 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3613 // of their non-masked counterpart with mask edge being the differentiator.
3614 // This routine does a strict check on the existence of masked operation patterns
3615 // by returning a default false value for all the other opcodes apart from the
3616 // ones whose masked instruction patterns are defined in this file.
3617 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3618 return false;
3619 }
3620
3621 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3622 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3623 return false;
3624 }
3625 switch(opcode) {
3626 // Unary masked operations
3627 case Op_AbsVB:
3628 case Op_AbsVS:
3629 if(!VM_Version::supports_avx512bw()) {
3630 return false; // Implementation limitation
3631 }
3632 case Op_AbsVI:
3633 case Op_AbsVL:
3634 return true;
3635
3636 // Ternary masked operations
3637 case Op_FmaVF:
3638 case Op_FmaVD:
3639 return true;
3640
3641 case Op_MacroLogicV:
3642 if(bt != T_INT && bt != T_LONG) {
3643 return false;
3644 }
3645 return true;
3646
3647 // Binary masked operations
3648 case Op_AddVB:
3649 case Op_AddVS:
3650 case Op_SubVB:
3651 case Op_SubVS:
3652 case Op_MulVS:
3653 case Op_LShiftVS:
3654 case Op_RShiftVS:
3655 case Op_URShiftVS:
3656 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3657 if (!VM_Version::supports_avx512bw()) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_MulVL:
3663 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3664 if (!VM_Version::supports_avx512dq()) {
3665 return false; // Implementation limitation
3666 }
3667 return true;
3668
3669 case Op_AndV:
3670 case Op_OrV:
3671 case Op_XorV:
3672 case Op_RotateRightV:
3673 case Op_RotateLeftV:
3674 if (bt != T_INT && bt != T_LONG) {
3675 return false; // Implementation limitation
3676 }
3677 return true;
3678
3679 case Op_VectorLoadMask:
3680 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3681 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3682 return false;
3683 }
3684 return true;
3685
3686 case Op_AddVI:
3687 case Op_AddVL:
3688 case Op_AddVF:
3689 case Op_AddVD:
3690 case Op_SubVI:
3691 case Op_SubVL:
3692 case Op_SubVF:
3693 case Op_SubVD:
3694 case Op_MulVI:
3695 case Op_MulVF:
3696 case Op_MulVD:
3697 case Op_DivVF:
3698 case Op_DivVD:
3699 case Op_SqrtVF:
3700 case Op_SqrtVD:
3701 case Op_LShiftVI:
3702 case Op_LShiftVL:
3703 case Op_RShiftVI:
3704 case Op_RShiftVL:
3705 case Op_URShiftVI:
3706 case Op_URShiftVL:
3707 case Op_LoadVectorMasked:
3708 case Op_StoreVectorMasked:
3709 case Op_LoadVectorGatherMasked:
3710 case Op_StoreVectorScatterMasked:
3711 return true;
3712
3713 case Op_UMinV:
3714 case Op_UMaxV:
3715 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3716 return false;
3717 } // fallthrough
3718 case Op_MaxV:
3719 case Op_MinV:
3720 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3721 return false; // Implementation limitation
3722 }
3723 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3724 return false; // Implementation limitation
3725 }
3726 return true;
3727 case Op_SaturatingAddV:
3728 case Op_SaturatingSubV:
3729 if (!is_subword_type(bt)) {
3730 return false;
3731 }
3732 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 case Op_VectorMaskCmp:
3738 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3739 return false; // Implementation limitation
3740 }
3741 return true;
3742
3743 case Op_VectorRearrange:
3744 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3745 return false; // Implementation limitation
3746 }
3747 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3748 return false; // Implementation limitation
3749 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3750 return false; // Implementation limitation
3751 }
3752 return true;
3753
3754 // Binary Logical operations
3755 case Op_AndVMask:
3756 case Op_OrVMask:
3757 case Op_XorVMask:
3758 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3759 return false; // Implementation limitation
3760 }
3761 return true;
3762
3763 case Op_PopCountVI:
3764 case Op_PopCountVL:
3765 if (!is_pop_count_instr_target(bt)) {
3766 return false;
3767 }
3768 return true;
3769
3770 case Op_MaskAll:
3771 return true;
3772
3773 case Op_CountLeadingZerosV:
3774 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3775 return true;
3776 }
3777 default:
3778 return false;
3779 }
3780 }
3781
3782 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3783 return false;
3784 }
3785
3786 // Return true if Vector::rearrange needs preparation of the shuffle argument
3787 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3788 switch (elem_bt) {
3789 case T_BYTE: return false;
3790 case T_SHORT: return !VM_Version::supports_avx512bw();
3791 case T_INT: return !VM_Version::supports_avx();
3792 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3793 default:
3794 ShouldNotReachHere();
3795 return false;
3796 }
3797 }
3798
3799 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3800 // Prefer predicate if the mask type is "TypeVectMask".
3801 return vt->isa_vectmask() != nullptr;
3802 }
3803
3804 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3805 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3806 bool legacy = (generic_opnd->opcode() == LEGVEC);
3807 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3808 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3809 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3810 return new legVecZOper();
3811 }
3812 if (legacy) {
3813 switch (ideal_reg) {
3814 case Op_VecS: return new legVecSOper();
3815 case Op_VecD: return new legVecDOper();
3816 case Op_VecX: return new legVecXOper();
3817 case Op_VecY: return new legVecYOper();
3818 case Op_VecZ: return new legVecZOper();
3819 }
3820 } else {
3821 switch (ideal_reg) {
3822 case Op_VecS: return new vecSOper();
3823 case Op_VecD: return new vecDOper();
3824 case Op_VecX: return new vecXOper();
3825 case Op_VecY: return new vecYOper();
3826 case Op_VecZ: return new vecZOper();
3827 }
3828 }
3829 ShouldNotReachHere();
3830 return nullptr;
3831 }
3832
3833 bool Matcher::is_reg2reg_move(MachNode* m) {
3834 switch (m->rule()) {
3835 case MoveVec2Leg_rule:
3836 case MoveLeg2Vec_rule:
3837 case MoveF2VL_rule:
3838 case MoveF2LEG_rule:
3839 case MoveVL2F_rule:
3840 case MoveLEG2F_rule:
3841 case MoveD2VL_rule:
3842 case MoveD2LEG_rule:
3843 case MoveVL2D_rule:
3844 case MoveLEG2D_rule:
3845 return true;
3846 default:
3847 return false;
3848 }
3849 }
3850
3851 bool Matcher::is_generic_vector(MachOper* opnd) {
3852 switch (opnd->opcode()) {
3853 case VEC:
3854 case LEGVEC:
3855 return true;
3856 default:
3857 return false;
3858 }
3859 }
3860
3861 //------------------------------------------------------------------------
3862
3863 const RegMask* Matcher::predicate_reg_mask(void) {
3864 return &_VECTMASK_REG_mask;
3865 }
3866
3867 // Max vector size in bytes. 0 if not supported.
3868 int Matcher::vector_width_in_bytes(BasicType bt) {
3869 assert(is_java_primitive(bt), "only primitive type vectors");
3870 // SSE2 supports 128bit vectors for all types.
3871 // AVX2 supports 256bit vectors for all types.
3872 // AVX2/EVEX supports 512bit vectors for all types.
3873 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3874 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3875 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3876 size = (UseAVX > 2) ? 64 : 32;
3877 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3878 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3879 // Use flag to limit vector size.
3880 size = MIN2(size,(int)MaxVectorSize);
3881 // Minimum 2 values in vector (or 4 for bytes).
3882 switch (bt) {
3883 case T_DOUBLE:
3884 case T_LONG:
3885 if (size < 16) return 0;
3886 break;
3887 case T_FLOAT:
3888 case T_INT:
3889 if (size < 8) return 0;
3890 break;
3891 case T_BOOLEAN:
3892 if (size < 4) return 0;
3893 break;
3894 case T_CHAR:
3895 if (size < 4) return 0;
3896 break;
3897 case T_BYTE:
3898 if (size < 4) return 0;
3899 break;
3900 case T_SHORT:
3901 if (size < 4) return 0;
3902 break;
3903 default:
3904 ShouldNotReachHere();
3905 }
3906 return size;
3907 }
3908
3909 // Limits on vector size (number of elements) loaded into vector.
3910 int Matcher::max_vector_size(const BasicType bt) {
3911 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3912 }
3913 int Matcher::min_vector_size(const BasicType bt) {
3914 int max_size = max_vector_size(bt);
3915 // Min size which can be loaded into vector is 4 bytes.
3916 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3917 // Support for calling svml double64 vectors
3918 if (bt == T_DOUBLE) {
3919 size = 1;
3920 }
3921 return MIN2(size,max_size);
3922 }
3923
3924 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3925 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3926 // by default on Cascade Lake
3927 if (VM_Version::is_default_intel_cascade_lake()) {
3928 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3929 }
3930 return Matcher::max_vector_size(bt);
3931 }
3932
3933 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3934 return -1;
3935 }
3936
3937 // Vector ideal reg corresponding to specified size in bytes
3938 uint Matcher::vector_ideal_reg(int size) {
3939 assert(MaxVectorSize >= size, "");
3940 switch(size) {
3941 case 4: return Op_VecS;
3942 case 8: return Op_VecD;
3943 case 16: return Op_VecX;
3944 case 32: return Op_VecY;
3945 case 64: return Op_VecZ;
3946 }
3947 ShouldNotReachHere();
3948 return 0;
3949 }
3950
3951 // Check for shift by small constant as well
3952 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3953 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3954 shift->in(2)->get_int() <= 3 &&
3955 // Are there other uses besides address expressions?
3956 !matcher->is_visited(shift)) {
3957 address_visited.set(shift->_idx); // Flag as address_visited
3958 mstack.push(shift->in(2), Matcher::Visit);
3959 Node *conv = shift->in(1);
3960 // Allow Matcher to match the rule which bypass
3961 // ConvI2L operation for an array index on LP64
3962 // if the index value is positive.
3963 if (conv->Opcode() == Op_ConvI2L &&
3964 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3965 // Are there other uses besides address expressions?
3966 !matcher->is_visited(conv)) {
3967 address_visited.set(conv->_idx); // Flag as address_visited
3968 mstack.push(conv->in(1), Matcher::Pre_Visit);
3969 } else {
3970 mstack.push(conv, Matcher::Pre_Visit);
3971 }
3972 return true;
3973 }
3974 return false;
3975 }
3976
3977 // This function identifies sub-graphs in which a 'load' node is
3978 // input to two different nodes, and such that it can be matched
3979 // with BMI instructions like blsi, blsr, etc.
3980 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3981 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3982 // refers to the same node.
3983 //
3984 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3985 // This is a temporary solution until we make DAGs expressible in ADL.
3986 template<typename ConType>
3987 class FusedPatternMatcher {
3988 Node* _op1_node;
3989 Node* _mop_node;
3990 int _con_op;
3991
3992 static int match_next(Node* n, int next_op, int next_op_idx) {
3993 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3994 return -1;
3995 }
3996
3997 if (next_op_idx == -1) { // n is commutative, try rotations
3998 if (n->in(1)->Opcode() == next_op) {
3999 return 1;
4000 } else if (n->in(2)->Opcode() == next_op) {
4001 return 2;
4002 }
4003 } else {
4004 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
4005 if (n->in(next_op_idx)->Opcode() == next_op) {
4006 return next_op_idx;
4007 }
4008 }
4009 return -1;
4010 }
4011
4012 public:
4013 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4014 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4015
4016 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4017 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4018 typename ConType::NativeType con_value) {
4019 if (_op1_node->Opcode() != op1) {
4020 return false;
4021 }
4022 if (_mop_node->outcnt() > 2) {
4023 return false;
4024 }
4025 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4026 if (op1_op2_idx == -1) {
4027 return false;
4028 }
4029 // Memory operation must be the other edge
4030 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4031
4032 // Check that the mop node is really what we want
4033 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4034 Node* op2_node = _op1_node->in(op1_op2_idx);
4035 if (op2_node->outcnt() > 1) {
4036 return false;
4037 }
4038 assert(op2_node->Opcode() == op2, "Should be");
4039 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4040 if (op2_con_idx == -1) {
4041 return false;
4042 }
4043 // Memory operation must be the other edge
4044 int op2_mop_idx = (op2_con_idx & 1) + 1;
4045 // Check that the memory operation is the same node
4046 if (op2_node->in(op2_mop_idx) == _mop_node) {
4047 // Now check the constant
4048 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4049 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4050 return true;
4051 }
4052 }
4053 }
4054 return false;
4055 }
4056 };
4057
4058 static bool is_bmi_pattern(Node* n, Node* m) {
4059 assert(UseBMI1Instructions, "sanity");
4060 if (n != nullptr && m != nullptr) {
4061 if (m->Opcode() == Op_LoadI) {
4062 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4063 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4064 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4065 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4066 } else if (m->Opcode() == Op_LoadL) {
4067 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4068 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4069 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4070 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4071 }
4072 }
4073 return false;
4074 }
4075
4076 // Should the matcher clone input 'm' of node 'n'?
4077 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4078 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4079 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4080 mstack.push(m, Visit);
4081 return true;
4082 }
4083 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4084 mstack.push(m, Visit); // m = ShiftCntV
4085 return true;
4086 }
4087 if (is_encode_and_store_pattern(n, m)) {
4088 mstack.push(m, Visit);
4089 return true;
4090 }
4091 return false;
4092 }
4093
4094 // Should the Matcher clone shifts on addressing modes, expecting them
4095 // to be subsumed into complex addressing expressions or compute them
4096 // into registers?
4097 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4098 Node *off = m->in(AddPNode::Offset);
4099 if (off->is_Con()) {
4100 address_visited.test_set(m->_idx); // Flag as address_visited
4101 Node *adr = m->in(AddPNode::Address);
4102
4103 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4104 // AtomicAdd is not an addressing expression.
4105 // Cheap to find it by looking for screwy base.
4106 if (adr->is_AddP() &&
4107 !adr->in(AddPNode::Base)->is_top() &&
4108 !adr->in(AddPNode::Offset)->is_Con() &&
4109 off->get_long() == (int) (off->get_long()) && // immL32
4110 // Are there other uses besides address expressions?
4111 !is_visited(adr)) {
4112 address_visited.set(adr->_idx); // Flag as address_visited
4113 Node *shift = adr->in(AddPNode::Offset);
4114 if (!clone_shift(shift, this, mstack, address_visited)) {
4115 mstack.push(shift, Pre_Visit);
4116 }
4117 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4118 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4119 } else {
4120 mstack.push(adr, Pre_Visit);
4121 }
4122
4123 // Clone X+offset as it also folds into most addressing expressions
4124 mstack.push(off, Visit);
4125 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4126 return true;
4127 } else if (clone_shift(off, this, mstack, address_visited)) {
4128 address_visited.test_set(m->_idx); // Flag as address_visited
4129 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4130 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4131 return true;
4132 }
4133 return false;
4134 }
4135
4136 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4137 switch (bt) {
4138 case BoolTest::eq:
4139 return Assembler::eq;
4140 case BoolTest::ne:
4141 return Assembler::neq;
4142 case BoolTest::le:
4143 case BoolTest::ule:
4144 return Assembler::le;
4145 case BoolTest::ge:
4146 case BoolTest::uge:
4147 return Assembler::nlt;
4148 case BoolTest::lt:
4149 case BoolTest::ult:
4150 return Assembler::lt;
4151 case BoolTest::gt:
4152 case BoolTest::ugt:
4153 return Assembler::nle;
4154 default : ShouldNotReachHere(); return Assembler::_false;
4155 }
4156 }
4157
4158 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4159 switch (bt) {
4160 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4161 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4162 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4163 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4164 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4165 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4166 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4167 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4168 }
4169 }
4170
4171 // Helper methods for MachSpillCopyNode::implementation().
4172 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4173 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4174 assert(ireg == Op_VecS || // 32bit vector
4175 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4176 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4177 "no non-adjacent vector moves" );
4178 if (masm) {
4179 switch (ireg) {
4180 case Op_VecS: // copy whole register
4181 case Op_VecD:
4182 case Op_VecX:
4183 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4184 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4185 } else {
4186 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4187 }
4188 break;
4189 case Op_VecY:
4190 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4191 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4192 } else {
4193 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4194 }
4195 break;
4196 case Op_VecZ:
4197 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4198 break;
4199 default:
4200 ShouldNotReachHere();
4201 }
4202 #ifndef PRODUCT
4203 } else {
4204 switch (ireg) {
4205 case Op_VecS:
4206 case Op_VecD:
4207 case Op_VecX:
4208 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4209 break;
4210 case Op_VecY:
4211 case Op_VecZ:
4212 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4213 break;
4214 default:
4215 ShouldNotReachHere();
4216 }
4217 #endif
4218 }
4219 }
4220
4221 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4222 int stack_offset, int reg, uint ireg, outputStream* st) {
4223 if (masm) {
4224 if (is_load) {
4225 switch (ireg) {
4226 case Op_VecS:
4227 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4228 break;
4229 case Op_VecD:
4230 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4231 break;
4232 case Op_VecX:
4233 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4234 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4235 } else {
4236 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4237 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4238 }
4239 break;
4240 case Op_VecY:
4241 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4242 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4243 } else {
4244 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4245 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4246 }
4247 break;
4248 case Op_VecZ:
4249 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4250 break;
4251 default:
4252 ShouldNotReachHere();
4253 }
4254 } else { // store
4255 switch (ireg) {
4256 case Op_VecS:
4257 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4258 break;
4259 case Op_VecD:
4260 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4261 break;
4262 case Op_VecX:
4263 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4264 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4265 }
4266 else {
4267 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4268 }
4269 break;
4270 case Op_VecY:
4271 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4272 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4273 }
4274 else {
4275 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4276 }
4277 break;
4278 case Op_VecZ:
4279 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4280 break;
4281 default:
4282 ShouldNotReachHere();
4283 }
4284 }
4285 #ifndef PRODUCT
4286 } else {
4287 if (is_load) {
4288 switch (ireg) {
4289 case Op_VecS:
4290 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4291 break;
4292 case Op_VecD:
4293 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4294 break;
4295 case Op_VecX:
4296 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4297 break;
4298 case Op_VecY:
4299 case Op_VecZ:
4300 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4301 break;
4302 default:
4303 ShouldNotReachHere();
4304 }
4305 } else { // store
4306 switch (ireg) {
4307 case Op_VecS:
4308 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4309 break;
4310 case Op_VecD:
4311 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4312 break;
4313 case Op_VecX:
4314 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4315 break;
4316 case Op_VecY:
4317 case Op_VecZ:
4318 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4319 break;
4320 default:
4321 ShouldNotReachHere();
4322 }
4323 }
4324 #endif
4325 }
4326 }
4327
4328 template <class T>
4329 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4330 int size = type2aelembytes(bt) * len;
4331 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4332 for (int i = 0; i < len; i++) {
4333 int offset = i * type2aelembytes(bt);
4334 switch (bt) {
4335 case T_BYTE: val->at(i) = con; break;
4336 case T_SHORT: {
4337 jshort c = con;
4338 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4339 break;
4340 }
4341 case T_INT: {
4342 jint c = con;
4343 memcpy(val->adr_at(offset), &c, sizeof(jint));
4344 break;
4345 }
4346 case T_LONG: {
4347 jlong c = con;
4348 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4349 break;
4350 }
4351 case T_FLOAT: {
4352 jfloat c = con;
4353 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4354 break;
4355 }
4356 case T_DOUBLE: {
4357 jdouble c = con;
4358 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4359 break;
4360 }
4361 default: assert(false, "%s", type2name(bt));
4362 }
4363 }
4364 return val;
4365 }
4366
4367 static inline jlong high_bit_set(BasicType bt) {
4368 switch (bt) {
4369 case T_BYTE: return 0x8080808080808080;
4370 case T_SHORT: return 0x8000800080008000;
4371 case T_INT: return 0x8000000080000000;
4372 case T_LONG: return 0x8000000000000000;
4373 default:
4374 ShouldNotReachHere();
4375 return 0;
4376 }
4377 }
4378
4379 #ifndef PRODUCT
4380 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4381 st->print("nop \t# %d bytes pad for loops and calls", _count);
4382 }
4383 #endif
4384
4385 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4386 __ nop(_count);
4387 }
4388
4389 uint MachNopNode::size(PhaseRegAlloc*) const {
4390 return _count;
4391 }
4392
4393 #ifndef PRODUCT
4394 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4395 st->print("# breakpoint");
4396 }
4397 #endif
4398
4399 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4400 __ int3();
4401 }
4402
4403 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4404 return MachNode::size(ra_);
4405 }
4406
4407 %}
4408
4409 //----------ENCODING BLOCK-----------------------------------------------------
4410 // This block specifies the encoding classes used by the compiler to
4411 // output byte streams. Encoding classes are parameterized macros
4412 // used by Machine Instruction Nodes in order to generate the bit
4413 // encoding of the instruction. Operands specify their base encoding
4414 // interface with the interface keyword. There are currently
4415 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4416 // COND_INTER. REG_INTER causes an operand to generate a function
4417 // which returns its register number when queried. CONST_INTER causes
4418 // an operand to generate a function which returns the value of the
4419 // constant when queried. MEMORY_INTER causes an operand to generate
4420 // four functions which return the Base Register, the Index Register,
4421 // the Scale Value, and the Offset Value of the operand when queried.
4422 // COND_INTER causes an operand to generate six functions which return
4423 // the encoding code (ie - encoding bits for the instruction)
4424 // associated with each basic boolean condition for a conditional
4425 // instruction.
4426 //
4427 // Instructions specify two basic values for encoding. Again, a
4428 // function is available to check if the constant displacement is an
4429 // oop. They use the ins_encode keyword to specify their encoding
4430 // classes (which must be a sequence of enc_class names, and their
4431 // parameters, specified in the encoding block), and they use the
4432 // opcode keyword to specify, in order, their primary, secondary, and
4433 // tertiary opcode. Only the opcode sections which a particular
4434 // instruction needs for encoding need to be specified.
4435 encode %{
4436 enc_class cdql_enc(no_rax_rdx_RegI div)
4437 %{
4438 // Full implementation of Java idiv and irem; checks for
4439 // special case as described in JVM spec., p.243 & p.271.
4440 //
4441 // normal case special case
4442 //
4443 // input : rax: dividend min_int
4444 // reg: divisor -1
4445 //
4446 // output: rax: quotient (= rax idiv reg) min_int
4447 // rdx: remainder (= rax irem reg) 0
4448 //
4449 // Code sequnce:
4450 //
4451 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4452 // 5: 75 07/08 jne e <normal>
4453 // 7: 33 d2 xor %edx,%edx
4454 // [div >= 8 -> offset + 1]
4455 // [REX_B]
4456 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4457 // c: 74 03/04 je 11 <done>
4458 // 000000000000000e <normal>:
4459 // e: 99 cltd
4460 // [div >= 8 -> offset + 1]
4461 // [REX_B]
4462 // f: f7 f9 idiv $div
4463 // 0000000000000011 <done>:
4464 Label normal;
4465 Label done;
4466
4467 // cmp $0x80000000,%eax
4468 __ cmpl(as_Register(RAX_enc), 0x80000000);
4469
4470 // jne e <normal>
4471 __ jccb(Assembler::notEqual, normal);
4472
4473 // xor %edx,%edx
4474 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4475
4476 // cmp $0xffffffffffffffff,%ecx
4477 __ cmpl($div$$Register, -1);
4478
4479 // je 11 <done>
4480 __ jccb(Assembler::equal, done);
4481
4482 // <normal>
4483 // cltd
4484 __ bind(normal);
4485 __ cdql();
4486
4487 // idivl
4488 // <done>
4489 __ idivl($div$$Register);
4490 __ bind(done);
4491 %}
4492
4493 enc_class cdqq_enc(no_rax_rdx_RegL div)
4494 %{
4495 // Full implementation of Java ldiv and lrem; checks for
4496 // special case as described in JVM spec., p.243 & p.271.
4497 //
4498 // normal case special case
4499 //
4500 // input : rax: dividend min_long
4501 // reg: divisor -1
4502 //
4503 // output: rax: quotient (= rax idiv reg) min_long
4504 // rdx: remainder (= rax irem reg) 0
4505 //
4506 // Code sequnce:
4507 //
4508 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4509 // 7: 00 00 80
4510 // a: 48 39 d0 cmp %rdx,%rax
4511 // d: 75 08 jne 17 <normal>
4512 // f: 33 d2 xor %edx,%edx
4513 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4514 // 15: 74 05 je 1c <done>
4515 // 0000000000000017 <normal>:
4516 // 17: 48 99 cqto
4517 // 19: 48 f7 f9 idiv $div
4518 // 000000000000001c <done>:
4519 Label normal;
4520 Label done;
4521
4522 // mov $0x8000000000000000,%rdx
4523 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4524
4525 // cmp %rdx,%rax
4526 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4527
4528 // jne 17 <normal>
4529 __ jccb(Assembler::notEqual, normal);
4530
4531 // xor %edx,%edx
4532 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4533
4534 // cmp $0xffffffffffffffff,$div
4535 __ cmpq($div$$Register, -1);
4536
4537 // je 1e <done>
4538 __ jccb(Assembler::equal, done);
4539
4540 // <normal>
4541 // cqto
4542 __ bind(normal);
4543 __ cdqq();
4544
4545 // idivq (note: must be emitted by the user of this rule)
4546 // <done>
4547 __ idivq($div$$Register);
4548 __ bind(done);
4549 %}
4550
4551 enc_class clear_avx %{
4552 DEBUG_ONLY(int off0 = __ offset());
4553 if (generate_vzeroupper(Compile::current())) {
4554 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4555 // Clear upper bits of YMM registers when current compiled code uses
4556 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4557 __ vzeroupper();
4558 }
4559 DEBUG_ONLY(int off1 = __ offset());
4560 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4561 %}
4562
4563 enc_class Java_To_Runtime(method meth) %{
4564 __ lea(r10, RuntimeAddress((address)$meth$$method));
4565 __ call(r10);
4566 __ post_call_nop();
4567 %}
4568
4569 enc_class Java_Static_Call(method meth)
4570 %{
4571 // JAVA STATIC CALL
4572 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4573 // determine who we intended to call.
4574 if (!_method) {
4575 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4576 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4577 // The NOP here is purely to ensure that eliding a call to
4578 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4579 __ addr_nop_5();
4580 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4581 } else {
4582 int method_index = resolved_method_index(masm);
4583 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4584 : static_call_Relocation::spec(method_index);
4585 address mark = __ pc();
4586 int call_offset = __ offset();
4587 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4588 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4589 // Calls of the same statically bound method can share
4590 // a stub to the interpreter.
4591 __ code()->shared_stub_to_interp_for(_method, call_offset);
4592 } else {
4593 // Emit stubs for static call.
4594 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4595 __ clear_inst_mark();
4596 if (stub == nullptr) {
4597 ciEnv::current()->record_failure("CodeCache is full");
4598 return;
4599 }
4600 }
4601 }
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class Java_Dynamic_Call(method meth) %{
4606 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4607 __ post_call_nop();
4608 %}
4609
4610 enc_class call_epilog %{
4611 if (VerifyStackAtCalls) {
4612 // Check that stack depth is unchanged: find majik cookie on stack
4613 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4614 Label L;
4615 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4616 __ jccb(Assembler::equal, L);
4617 // Die if stack mismatch
4618 __ int3();
4619 __ bind(L);
4620 }
4621 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4622 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4623 // Search for the corresponding projection, get the register and emit code that initialized it.
4624 uint con = (tf()->range_cc()->cnt() - 1);
4625 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4626 ProjNode* proj = fast_out(i)->as_Proj();
4627 if (proj->_con == con) {
4628 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4629 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4630 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4631 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4632 __ testq(rax, rax);
4633 __ setb(Assembler::notZero, toReg);
4634 __ movzbl(toReg, toReg);
4635 if (reg->is_stack()) {
4636 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4637 __ movq(Address(rsp, st_off), toReg);
4638 }
4639 break;
4640 }
4641 }
4642 if (return_value_is_used()) {
4643 // An inline type is returned as fields in multiple registers.
4644 // Rax either contains an oop if the inline type is buffered or a pointer
4645 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4646 // if the lowest bit is set to allow C2 to use the oop after null checking.
4647 // rax &= (rax & 1) - 1
4648 __ movptr(rscratch1, rax);
4649 __ andptr(rscratch1, 0x1);
4650 __ subptr(rscratch1, 0x1);
4651 __ andptr(rax, rscratch1);
4652 }
4653 }
4654 %}
4655
4656 %}
4657
4658 //----------FRAME--------------------------------------------------------------
4659 // Definition of frame structure and management information.
4660 //
4661 // S T A C K L A Y O U T Allocators stack-slot number
4662 // | (to get allocators register number
4663 // G Owned by | | v add OptoReg::stack0())
4664 // r CALLER | |
4665 // o | +--------+ pad to even-align allocators stack-slot
4666 // w V | pad0 | numbers; owned by CALLER
4667 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4668 // h ^ | in | 5
4669 // | | args | 4 Holes in incoming args owned by SELF
4670 // | | | | 3
4671 // | | +--------+
4672 // V | | old out| Empty on Intel, window on Sparc
4673 // | old |preserve| Must be even aligned.
4674 // | SP-+--------+----> Matcher::_old_SP, even aligned
4675 // | | in | 3 area for Intel ret address
4676 // Owned by |preserve| Empty on Sparc.
4677 // SELF +--------+
4678 // | | pad2 | 2 pad to align old SP
4679 // | +--------+ 1
4680 // | | locks | 0
4681 // | +--------+----> OptoReg::stack0(), even aligned
4682 // | | pad1 | 11 pad to align new SP
4683 // | +--------+
4684 // | | | 10
4685 // | | spills | 9 spills
4686 // V | | 8 (pad0 slot for callee)
4687 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4688 // ^ | out | 7
4689 // | | args | 6 Holes in outgoing args owned by CALLEE
4690 // Owned by +--------+
4691 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4692 // | new |preserve| Must be even-aligned.
4693 // | SP-+--------+----> Matcher::_new_SP, even aligned
4694 // | | |
4695 //
4696 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4697 // known from SELF's arguments and the Java calling convention.
4698 // Region 6-7 is determined per call site.
4699 // Note 2: If the calling convention leaves holes in the incoming argument
4700 // area, those holes are owned by SELF. Holes in the outgoing area
4701 // are owned by the CALLEE. Holes should not be necessary in the
4702 // incoming area, as the Java calling convention is completely under
4703 // the control of the AD file. Doubles can be sorted and packed to
4704 // avoid holes. Holes in the outgoing arguments may be necessary for
4705 // varargs C calling conventions.
4706 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4707 // even aligned with pad0 as needed.
4708 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4709 // region 6-11 is even aligned; it may be padded out more so that
4710 // the region from SP to FP meets the minimum stack alignment.
4711 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4712 // alignment. Region 11, pad1, may be dynamically extended so that
4713 // SP meets the minimum alignment.
4714
4715 frame
4716 %{
4717 // These three registers define part of the calling convention
4718 // between compiled code and the interpreter.
4719 inline_cache_reg(RAX); // Inline Cache Register
4720
4721 // Optional: name the operand used by cisc-spilling to access
4722 // [stack_pointer + offset]
4723 cisc_spilling_operand_name(indOffset32);
4724
4725 // Number of stack slots consumed by locking an object
4726 sync_stack_slots(2);
4727
4728 // Compiled code's Frame Pointer
4729 frame_pointer(RSP);
4730
4731 // Stack alignment requirement
4732 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4733
4734 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4735 // for calls to C. Supports the var-args backing area for register parms.
4736 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4737
4738 // The after-PROLOG location of the return address. Location of
4739 // return address specifies a type (REG or STACK) and a number
4740 // representing the register number (i.e. - use a register name) or
4741 // stack slot.
4742 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4743 // Otherwise, it is above the locks and verification slot and alignment word
4744 return_addr(STACK - 2 +
4745 align_up((Compile::current()->in_preserve_stack_slots() +
4746 Compile::current()->fixed_slots()),
4747 stack_alignment_in_slots()));
4748
4749 // Location of compiled Java return values. Same as C for now.
4750 return_value
4751 %{
4752 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4753 "only return normal values");
4754
4755 static const int lo[Op_RegL + 1] = {
4756 0,
4757 0,
4758 RAX_num, // Op_RegN
4759 RAX_num, // Op_RegI
4760 RAX_num, // Op_RegP
4761 XMM0_num, // Op_RegF
4762 XMM0_num, // Op_RegD
4763 RAX_num // Op_RegL
4764 };
4765 static const int hi[Op_RegL + 1] = {
4766 0,
4767 0,
4768 OptoReg::Bad, // Op_RegN
4769 OptoReg::Bad, // Op_RegI
4770 RAX_H_num, // Op_RegP
4771 OptoReg::Bad, // Op_RegF
4772 XMM0b_num, // Op_RegD
4773 RAX_H_num // Op_RegL
4774 };
4775 // Excluded flags and vector registers.
4776 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4777 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4778 %}
4779 %}
4780
4781 //----------ATTRIBUTES---------------------------------------------------------
4782 //----------Operand Attributes-------------------------------------------------
4783 op_attrib op_cost(0); // Required cost attribute
4784
4785 //----------Instruction Attributes---------------------------------------------
4786 ins_attrib ins_cost(100); // Required cost attribute
4787 ins_attrib ins_size(8); // Required size attribute (in bits)
4788 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4789 // a non-matching short branch variant
4790 // of some long branch?
4791 ins_attrib ins_alignment(1); // Required alignment attribute (must
4792 // be a power of 2) specifies the
4793 // alignment that some part of the
4794 // instruction (not necessarily the
4795 // start) requires. If > 1, a
4796 // compute_padding() function must be
4797 // provided for the instruction
4798
4799 // Whether this node is expanded during code emission into a sequence of
4800 // instructions and the first instruction can perform an implicit null check.
4801 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4802
4803 //----------OPERANDS-----------------------------------------------------------
4804 // Operand definitions must precede instruction definitions for correct parsing
4805 // in the ADLC because operands constitute user defined types which are used in
4806 // instruction definitions.
4807
4808 //----------Simple Operands----------------------------------------------------
4809 // Immediate Operands
4810 // Integer Immediate
4811 operand immI()
4812 %{
4813 match(ConI);
4814
4815 op_cost(10);
4816 format %{ %}
4817 interface(CONST_INTER);
4818 %}
4819
4820 // Constant for test vs zero
4821 operand immI_0()
4822 %{
4823 predicate(n->get_int() == 0);
4824 match(ConI);
4825
4826 op_cost(0);
4827 format %{ %}
4828 interface(CONST_INTER);
4829 %}
4830
4831 // Constant for increment
4832 operand immI_1()
4833 %{
4834 predicate(n->get_int() == 1);
4835 match(ConI);
4836
4837 op_cost(0);
4838 format %{ %}
4839 interface(CONST_INTER);
4840 %}
4841
4842 // Constant for decrement
4843 operand immI_M1()
4844 %{
4845 predicate(n->get_int() == -1);
4846 match(ConI);
4847
4848 op_cost(0);
4849 format %{ %}
4850 interface(CONST_INTER);
4851 %}
4852
4853 operand immI_2()
4854 %{
4855 predicate(n->get_int() == 2);
4856 match(ConI);
4857
4858 op_cost(0);
4859 format %{ %}
4860 interface(CONST_INTER);
4861 %}
4862
4863 operand immI_4()
4864 %{
4865 predicate(n->get_int() == 4);
4866 match(ConI);
4867
4868 op_cost(0);
4869 format %{ %}
4870 interface(CONST_INTER);
4871 %}
4872
4873 operand immI_8()
4874 %{
4875 predicate(n->get_int() == 8);
4876 match(ConI);
4877
4878 op_cost(0);
4879 format %{ %}
4880 interface(CONST_INTER);
4881 %}
4882
4883 // Valid scale values for addressing modes
4884 operand immI2()
4885 %{
4886 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4887 match(ConI);
4888
4889 format %{ %}
4890 interface(CONST_INTER);
4891 %}
4892
4893 operand immU7()
4894 %{
4895 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4896 match(ConI);
4897
4898 op_cost(5);
4899 format %{ %}
4900 interface(CONST_INTER);
4901 %}
4902
4903 operand immI8()
4904 %{
4905 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4906 match(ConI);
4907
4908 op_cost(5);
4909 format %{ %}
4910 interface(CONST_INTER);
4911 %}
4912
4913 operand immU8()
4914 %{
4915 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4916 match(ConI);
4917
4918 op_cost(5);
4919 format %{ %}
4920 interface(CONST_INTER);
4921 %}
4922
4923 operand immI16()
4924 %{
4925 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4926 match(ConI);
4927
4928 op_cost(10);
4929 format %{ %}
4930 interface(CONST_INTER);
4931 %}
4932
4933 // Int Immediate non-negative
4934 operand immU31()
4935 %{
4936 predicate(n->get_int() >= 0);
4937 match(ConI);
4938
4939 op_cost(0);
4940 format %{ %}
4941 interface(CONST_INTER);
4942 %}
4943
4944 // Pointer Immediate
4945 operand immP()
4946 %{
4947 match(ConP);
4948
4949 op_cost(10);
4950 format %{ %}
4951 interface(CONST_INTER);
4952 %}
4953
4954 // Null Pointer Immediate
4955 operand immP0()
4956 %{
4957 predicate(n->get_ptr() == 0);
4958 match(ConP);
4959
4960 op_cost(5);
4961 format %{ %}
4962 interface(CONST_INTER);
4963 %}
4964
4965 // Pointer Immediate
4966 operand immN() %{
4967 match(ConN);
4968
4969 op_cost(10);
4970 format %{ %}
4971 interface(CONST_INTER);
4972 %}
4973
4974 operand immNKlass() %{
4975 match(ConNKlass);
4976
4977 op_cost(10);
4978 format %{ %}
4979 interface(CONST_INTER);
4980 %}
4981
4982 // Null Pointer Immediate
4983 operand immN0() %{
4984 predicate(n->get_narrowcon() == 0);
4985 match(ConN);
4986
4987 op_cost(5);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 operand immP31()
4993 %{
4994 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4995 && (n->get_ptr() >> 31) == 0);
4996 match(ConP);
4997
4998 op_cost(5);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003
5004 // Long Immediate
5005 operand immL()
5006 %{
5007 match(ConL);
5008
5009 op_cost(20);
5010 format %{ %}
5011 interface(CONST_INTER);
5012 %}
5013
5014 // Long Immediate 8-bit
5015 operand immL8()
5016 %{
5017 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5018 match(ConL);
5019
5020 op_cost(5);
5021 format %{ %}
5022 interface(CONST_INTER);
5023 %}
5024
5025 // Long Immediate 32-bit unsigned
5026 operand immUL32()
5027 %{
5028 predicate(n->get_long() == (unsigned int) (n->get_long()));
5029 match(ConL);
5030
5031 op_cost(10);
5032 format %{ %}
5033 interface(CONST_INTER);
5034 %}
5035
5036 // Long Immediate 32-bit signed
5037 operand immL32()
5038 %{
5039 predicate(n->get_long() == (int) (n->get_long()));
5040 match(ConL);
5041
5042 op_cost(15);
5043 format %{ %}
5044 interface(CONST_INTER);
5045 %}
5046
5047 operand immL_Pow2()
5048 %{
5049 predicate(is_power_of_2((julong)n->get_long()));
5050 match(ConL);
5051
5052 op_cost(15);
5053 format %{ %}
5054 interface(CONST_INTER);
5055 %}
5056
5057 operand immL_NotPow2()
5058 %{
5059 predicate(is_power_of_2((julong)~n->get_long()));
5060 match(ConL);
5061
5062 op_cost(15);
5063 format %{ %}
5064 interface(CONST_INTER);
5065 %}
5066
5067 // Long Immediate zero
5068 operand immL0()
5069 %{
5070 predicate(n->get_long() == 0L);
5071 match(ConL);
5072
5073 op_cost(10);
5074 format %{ %}
5075 interface(CONST_INTER);
5076 %}
5077
5078 // Constant for increment
5079 operand immL1()
5080 %{
5081 predicate(n->get_long() == 1);
5082 match(ConL);
5083
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Constant for decrement
5089 operand immL_M1()
5090 %{
5091 predicate(n->get_long() == -1);
5092 match(ConL);
5093
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Long Immediate: low 32-bit mask
5099 operand immL_32bits()
5100 %{
5101 predicate(n->get_long() == 0xFFFFFFFFL);
5102 match(ConL);
5103 op_cost(20);
5104
5105 format %{ %}
5106 interface(CONST_INTER);
5107 %}
5108
5109 // Int Immediate: 2^n-1, positive
5110 operand immI_Pow2M1()
5111 %{
5112 predicate((n->get_int() > 0)
5113 && is_power_of_2((juint)n->get_int() + 1));
5114 match(ConI);
5115
5116 op_cost(20);
5117 format %{ %}
5118 interface(CONST_INTER);
5119 %}
5120
5121 // Float Immediate zero
5122 operand immF0()
5123 %{
5124 predicate(jint_cast(n->getf()) == 0);
5125 match(ConF);
5126
5127 op_cost(5);
5128 format %{ %}
5129 interface(CONST_INTER);
5130 %}
5131
5132 // Float Immediate
5133 operand immF()
5134 %{
5135 match(ConF);
5136
5137 op_cost(15);
5138 format %{ %}
5139 interface(CONST_INTER);
5140 %}
5141
5142 // Half Float Immediate
5143 operand immH()
5144 %{
5145 match(ConH);
5146
5147 op_cost(15);
5148 format %{ %}
5149 interface(CONST_INTER);
5150 %}
5151
5152 // Double Immediate zero
5153 operand immD0()
5154 %{
5155 predicate(jlong_cast(n->getd()) == 0);
5156 match(ConD);
5157
5158 op_cost(5);
5159 format %{ %}
5160 interface(CONST_INTER);
5161 %}
5162
5163 // Double Immediate
5164 operand immD()
5165 %{
5166 match(ConD);
5167
5168 op_cost(15);
5169 format %{ %}
5170 interface(CONST_INTER);
5171 %}
5172
5173 // Immediates for special shifts (sign extend)
5174
5175 // Constants for increment
5176 operand immI_16()
5177 %{
5178 predicate(n->get_int() == 16);
5179 match(ConI);
5180
5181 format %{ %}
5182 interface(CONST_INTER);
5183 %}
5184
5185 operand immI_24()
5186 %{
5187 predicate(n->get_int() == 24);
5188 match(ConI);
5189
5190 format %{ %}
5191 interface(CONST_INTER);
5192 %}
5193
5194 // Constant for byte-wide masking
5195 operand immI_255()
5196 %{
5197 predicate(n->get_int() == 255);
5198 match(ConI);
5199
5200 format %{ %}
5201 interface(CONST_INTER);
5202 %}
5203
5204 // Constant for short-wide masking
5205 operand immI_65535()
5206 %{
5207 predicate(n->get_int() == 65535);
5208 match(ConI);
5209
5210 format %{ %}
5211 interface(CONST_INTER);
5212 %}
5213
5214 // Constant for byte-wide masking
5215 operand immL_255()
5216 %{
5217 predicate(n->get_long() == 255);
5218 match(ConL);
5219
5220 format %{ %}
5221 interface(CONST_INTER);
5222 %}
5223
5224 // Constant for short-wide masking
5225 operand immL_65535()
5226 %{
5227 predicate(n->get_long() == 65535);
5228 match(ConL);
5229
5230 format %{ %}
5231 interface(CONST_INTER);
5232 %}
5233
5234 // AOT Runtime Constants Address
5235 operand immAOTRuntimeConstantsAddress()
5236 %{
5237 // Check if the address is in the range of AOT Runtime Constants
5238 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5239 match(ConP);
5240
5241 op_cost(0);
5242 format %{ %}
5243 interface(CONST_INTER);
5244 %}
5245
5246 operand kReg()
5247 %{
5248 constraint(ALLOC_IN_RC(vectmask_reg));
5249 match(RegVectMask);
5250 format %{%}
5251 interface(REG_INTER);
5252 %}
5253
5254 // Register Operands
5255 // Integer Register
5256 operand rRegI()
5257 %{
5258 constraint(ALLOC_IN_RC(int_reg));
5259 match(RegI);
5260
5261 match(rax_RegI);
5262 match(rbx_RegI);
5263 match(rcx_RegI);
5264 match(rdx_RegI);
5265 match(rdi_RegI);
5266
5267 format %{ %}
5268 interface(REG_INTER);
5269 %}
5270
5271 // Special Registers
5272 operand rax_RegI()
5273 %{
5274 constraint(ALLOC_IN_RC(int_rax_reg));
5275 match(RegI);
5276 match(rRegI);
5277
5278 format %{ "RAX" %}
5279 interface(REG_INTER);
5280 %}
5281
5282 // Special Registers
5283 operand rbx_RegI()
5284 %{
5285 constraint(ALLOC_IN_RC(int_rbx_reg));
5286 match(RegI);
5287 match(rRegI);
5288
5289 format %{ "RBX" %}
5290 interface(REG_INTER);
5291 %}
5292
5293 operand rcx_RegI()
5294 %{
5295 constraint(ALLOC_IN_RC(int_rcx_reg));
5296 match(RegI);
5297 match(rRegI);
5298
5299 format %{ "RCX" %}
5300 interface(REG_INTER);
5301 %}
5302
5303 operand rdx_RegI()
5304 %{
5305 constraint(ALLOC_IN_RC(int_rdx_reg));
5306 match(RegI);
5307 match(rRegI);
5308
5309 format %{ "RDX" %}
5310 interface(REG_INTER);
5311 %}
5312
5313 operand rdi_RegI()
5314 %{
5315 constraint(ALLOC_IN_RC(int_rdi_reg));
5316 match(RegI);
5317 match(rRegI);
5318
5319 format %{ "RDI" %}
5320 interface(REG_INTER);
5321 %}
5322
5323 operand no_rax_rdx_RegI()
5324 %{
5325 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5326 match(RegI);
5327 match(rbx_RegI);
5328 match(rcx_RegI);
5329 match(rdi_RegI);
5330
5331 format %{ %}
5332 interface(REG_INTER);
5333 %}
5334
5335 operand no_rbp_r13_RegI()
5336 %{
5337 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5338 match(RegI);
5339 match(rRegI);
5340 match(rax_RegI);
5341 match(rbx_RegI);
5342 match(rcx_RegI);
5343 match(rdx_RegI);
5344 match(rdi_RegI);
5345
5346 format %{ %}
5347 interface(REG_INTER);
5348 %}
5349
5350 // Pointer Register
5351 operand any_RegP()
5352 %{
5353 constraint(ALLOC_IN_RC(any_reg));
5354 match(RegP);
5355 match(rax_RegP);
5356 match(rbx_RegP);
5357 match(rdi_RegP);
5358 match(rsi_RegP);
5359 match(rbp_RegP);
5360 match(r15_RegP);
5361 match(rRegP);
5362
5363 format %{ %}
5364 interface(REG_INTER);
5365 %}
5366
5367 operand rRegP()
5368 %{
5369 constraint(ALLOC_IN_RC(ptr_reg));
5370 match(RegP);
5371 match(rax_RegP);
5372 match(rbx_RegP);
5373 match(rdi_RegP);
5374 match(rsi_RegP);
5375 match(rbp_RegP); // See Q&A below about
5376 match(r15_RegP); // r15_RegP and rbp_RegP.
5377
5378 format %{ %}
5379 interface(REG_INTER);
5380 %}
5381
5382 operand rRegN() %{
5383 constraint(ALLOC_IN_RC(int_reg));
5384 match(RegN);
5385
5386 format %{ %}
5387 interface(REG_INTER);
5388 %}
5389
5390 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5391 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5392 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5393 // The output of an instruction is controlled by the allocator, which respects
5394 // register class masks, not match rules. Unless an instruction mentions
5395 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5396 // by the allocator as an input.
5397 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5398 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5399 // result, RBP is not included in the output of the instruction either.
5400
5401 // This operand is not allowed to use RBP even if
5402 // RBP is not used to hold the frame pointer.
5403 operand no_rbp_RegP()
5404 %{
5405 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5406 match(RegP);
5407 match(rbx_RegP);
5408 match(rsi_RegP);
5409 match(rdi_RegP);
5410
5411 format %{ %}
5412 interface(REG_INTER);
5413 %}
5414
5415 // Special Registers
5416 // Return a pointer value
5417 operand rax_RegP()
5418 %{
5419 constraint(ALLOC_IN_RC(ptr_rax_reg));
5420 match(RegP);
5421 match(rRegP);
5422
5423 format %{ %}
5424 interface(REG_INTER);
5425 %}
5426
5427 // Special Registers
5428 // Return a compressed pointer value
5429 operand rax_RegN()
5430 %{
5431 constraint(ALLOC_IN_RC(int_rax_reg));
5432 match(RegN);
5433 match(rRegN);
5434
5435 format %{ %}
5436 interface(REG_INTER);
5437 %}
5438
5439 // Used in AtomicAdd
5440 operand rbx_RegP()
5441 %{
5442 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5443 match(RegP);
5444 match(rRegP);
5445
5446 format %{ %}
5447 interface(REG_INTER);
5448 %}
5449
5450 operand rsi_RegP()
5451 %{
5452 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5453 match(RegP);
5454 match(rRegP);
5455
5456 format %{ %}
5457 interface(REG_INTER);
5458 %}
5459
5460 operand rbp_RegP()
5461 %{
5462 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5463 match(RegP);
5464 match(rRegP);
5465
5466 format %{ %}
5467 interface(REG_INTER);
5468 %}
5469
5470 // Used in rep stosq
5471 operand rdi_RegP()
5472 %{
5473 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5474 match(RegP);
5475 match(rRegP);
5476
5477 format %{ %}
5478 interface(REG_INTER);
5479 %}
5480
5481 operand r15_RegP()
5482 %{
5483 constraint(ALLOC_IN_RC(ptr_r15_reg));
5484 match(RegP);
5485 match(rRegP);
5486
5487 format %{ %}
5488 interface(REG_INTER);
5489 %}
5490
5491 operand rRegL()
5492 %{
5493 constraint(ALLOC_IN_RC(long_reg));
5494 match(RegL);
5495 match(rax_RegL);
5496 match(rdx_RegL);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 // Special Registers
5503 operand no_rax_rdx_RegL()
5504 %{
5505 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5506 match(RegL);
5507 match(rRegL);
5508
5509 format %{ %}
5510 interface(REG_INTER);
5511 %}
5512
5513 operand rax_RegL()
5514 %{
5515 constraint(ALLOC_IN_RC(long_rax_reg));
5516 match(RegL);
5517 match(rRegL);
5518
5519 format %{ "RAX" %}
5520 interface(REG_INTER);
5521 %}
5522
5523 operand rcx_RegL()
5524 %{
5525 constraint(ALLOC_IN_RC(long_rcx_reg));
5526 match(RegL);
5527 match(rRegL);
5528
5529 format %{ %}
5530 interface(REG_INTER);
5531 %}
5532
5533 operand rdx_RegL()
5534 %{
5535 constraint(ALLOC_IN_RC(long_rdx_reg));
5536 match(RegL);
5537 match(rRegL);
5538
5539 format %{ %}
5540 interface(REG_INTER);
5541 %}
5542
5543 operand r11_RegL()
5544 %{
5545 constraint(ALLOC_IN_RC(long_r11_reg));
5546 match(RegL);
5547 match(rRegL);
5548
5549 format %{ %}
5550 interface(REG_INTER);
5551 %}
5552
5553 operand no_rbp_r13_RegL()
5554 %{
5555 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5556 match(RegL);
5557 match(rRegL);
5558 match(rax_RegL);
5559 match(rcx_RegL);
5560 match(rdx_RegL);
5561
5562 format %{ %}
5563 interface(REG_INTER);
5564 %}
5565
5566 // Flags register, used as output of compare instructions
5567 operand rFlagsReg()
5568 %{
5569 constraint(ALLOC_IN_RC(int_flags));
5570 match(RegFlags);
5571
5572 format %{ "RFLAGS" %}
5573 interface(REG_INTER);
5574 %}
5575
5576 // Flags register, used as output of FLOATING POINT compare instructions
5577 operand rFlagsRegU()
5578 %{
5579 constraint(ALLOC_IN_RC(int_flags));
5580 match(RegFlags);
5581
5582 format %{ "RFLAGS_U" %}
5583 interface(REG_INTER);
5584 %}
5585
5586 operand rFlagsRegUCF() %{
5587 constraint(ALLOC_IN_RC(int_flags));
5588 match(RegFlags);
5589 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5590
5591 format %{ "RFLAGS_U_CF" %}
5592 interface(REG_INTER);
5593 %}
5594
5595 operand rFlagsRegUCFE() %{
5596 constraint(ALLOC_IN_RC(int_flags));
5597 match(RegFlags);
5598 predicate(UseAPX && VM_Version::supports_avx10_2());
5599
5600 format %{ "RFLAGS_U_CFE" %}
5601 interface(REG_INTER);
5602 %}
5603
5604 // Float register operands
5605 operand regF() %{
5606 constraint(ALLOC_IN_RC(float_reg));
5607 match(RegF);
5608
5609 format %{ %}
5610 interface(REG_INTER);
5611 %}
5612
5613 // Float register operands
5614 operand legRegF() %{
5615 constraint(ALLOC_IN_RC(float_reg_legacy));
5616 match(RegF);
5617
5618 format %{ %}
5619 interface(REG_INTER);
5620 %}
5621
5622 // Float register operands
5623 operand vlRegF() %{
5624 constraint(ALLOC_IN_RC(float_reg_vl));
5625 match(RegF);
5626
5627 format %{ %}
5628 interface(REG_INTER);
5629 %}
5630
5631 // Double register operands
5632 operand regD() %{
5633 constraint(ALLOC_IN_RC(double_reg));
5634 match(RegD);
5635
5636 format %{ %}
5637 interface(REG_INTER);
5638 %}
5639
5640 // Double register operands
5641 operand legRegD() %{
5642 constraint(ALLOC_IN_RC(double_reg_legacy));
5643 match(RegD);
5644
5645 format %{ %}
5646 interface(REG_INTER);
5647 %}
5648
5649 // Double register operands
5650 operand vlRegD() %{
5651 constraint(ALLOC_IN_RC(double_reg_vl));
5652 match(RegD);
5653
5654 format %{ %}
5655 interface(REG_INTER);
5656 %}
5657
5658 //----------Memory Operands----------------------------------------------------
5659 // Direct Memory Operand
5660 // operand direct(immP addr)
5661 // %{
5662 // match(addr);
5663
5664 // format %{ "[$addr]" %}
5665 // interface(MEMORY_INTER) %{
5666 // base(0xFFFFFFFF);
5667 // index(0x4);
5668 // scale(0x0);
5669 // disp($addr);
5670 // %}
5671 // %}
5672
5673 // Indirect Memory Operand
5674 operand indirect(any_RegP reg)
5675 %{
5676 constraint(ALLOC_IN_RC(ptr_reg));
5677 match(reg);
5678
5679 format %{ "[$reg]" %}
5680 interface(MEMORY_INTER) %{
5681 base($reg);
5682 index(0x4);
5683 scale(0x0);
5684 disp(0x0);
5685 %}
5686 %}
5687
5688 // Indirect Memory Plus Short Offset Operand
5689 operand indOffset8(any_RegP reg, immL8 off)
5690 %{
5691 constraint(ALLOC_IN_RC(ptr_reg));
5692 match(AddP reg off);
5693
5694 format %{ "[$reg + $off (8-bit)]" %}
5695 interface(MEMORY_INTER) %{
5696 base($reg);
5697 index(0x4);
5698 scale(0x0);
5699 disp($off);
5700 %}
5701 %}
5702
5703 // Indirect Memory Plus Long Offset Operand
5704 operand indOffset32(any_RegP reg, immL32 off)
5705 %{
5706 constraint(ALLOC_IN_RC(ptr_reg));
5707 match(AddP reg off);
5708
5709 format %{ "[$reg + $off (32-bit)]" %}
5710 interface(MEMORY_INTER) %{
5711 base($reg);
5712 index(0x4);
5713 scale(0x0);
5714 disp($off);
5715 %}
5716 %}
5717
5718 // Indirect Memory Plus Index Register Plus Offset Operand
5719 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5720 %{
5721 constraint(ALLOC_IN_RC(ptr_reg));
5722 match(AddP (AddP reg lreg) off);
5723
5724 op_cost(10);
5725 format %{"[$reg + $off + $lreg]" %}
5726 interface(MEMORY_INTER) %{
5727 base($reg);
5728 index($lreg);
5729 scale(0x0);
5730 disp($off);
5731 %}
5732 %}
5733
5734 // Indirect Memory Plus Index Register Plus Offset Operand
5735 operand indIndex(any_RegP reg, rRegL lreg)
5736 %{
5737 constraint(ALLOC_IN_RC(ptr_reg));
5738 match(AddP reg lreg);
5739
5740 op_cost(10);
5741 format %{"[$reg + $lreg]" %}
5742 interface(MEMORY_INTER) %{
5743 base($reg);
5744 index($lreg);
5745 scale(0x0);
5746 disp(0x0);
5747 %}
5748 %}
5749
5750 // Indirect Memory Times Scale Plus Index Register
5751 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5752 %{
5753 constraint(ALLOC_IN_RC(ptr_reg));
5754 match(AddP reg (LShiftL lreg scale));
5755
5756 op_cost(10);
5757 format %{"[$reg + $lreg << $scale]" %}
5758 interface(MEMORY_INTER) %{
5759 base($reg);
5760 index($lreg);
5761 scale($scale);
5762 disp(0x0);
5763 %}
5764 %}
5765
5766 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5767 %{
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5770 match(AddP reg (LShiftL (ConvI2L idx) scale));
5771
5772 op_cost(10);
5773 format %{"[$reg + pos $idx << $scale]" %}
5774 interface(MEMORY_INTER) %{
5775 base($reg);
5776 index($idx);
5777 scale($scale);
5778 disp(0x0);
5779 %}
5780 %}
5781
5782 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5783 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5784 %{
5785 constraint(ALLOC_IN_RC(ptr_reg));
5786 match(AddP (AddP reg (LShiftL lreg scale)) off);
5787
5788 op_cost(10);
5789 format %{"[$reg + $off + $lreg << $scale]" %}
5790 interface(MEMORY_INTER) %{
5791 base($reg);
5792 index($lreg);
5793 scale($scale);
5794 disp($off);
5795 %}
5796 %}
5797
5798 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5799 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5800 %{
5801 constraint(ALLOC_IN_RC(ptr_reg));
5802 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5803 match(AddP (AddP reg (ConvI2L idx)) off);
5804
5805 op_cost(10);
5806 format %{"[$reg + $off + $idx]" %}
5807 interface(MEMORY_INTER) %{
5808 base($reg);
5809 index($idx);
5810 scale(0x0);
5811 disp($off);
5812 %}
5813 %}
5814
5815 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5816 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5817 %{
5818 constraint(ALLOC_IN_RC(ptr_reg));
5819 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5820 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5821
5822 op_cost(10);
5823 format %{"[$reg + $off + $idx << $scale]" %}
5824 interface(MEMORY_INTER) %{
5825 base($reg);
5826 index($idx);
5827 scale($scale);
5828 disp($off);
5829 %}
5830 %}
5831
5832 // Indirect Narrow Oop Operand
5833 operand indCompressedOop(rRegN reg) %{
5834 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5835 constraint(ALLOC_IN_RC(ptr_reg));
5836 match(DecodeN reg);
5837
5838 op_cost(10);
5839 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5840 interface(MEMORY_INTER) %{
5841 base(0xc); // R12
5842 index($reg);
5843 scale(0x3);
5844 disp(0x0);
5845 %}
5846 %}
5847
5848 // Indirect Narrow Oop Plus Offset Operand
5849 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5850 // we can't free r12 even with CompressedOops::base() == nullptr.
5851 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5852 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5853 constraint(ALLOC_IN_RC(ptr_reg));
5854 match(AddP (DecodeN reg) off);
5855
5856 op_cost(10);
5857 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5858 interface(MEMORY_INTER) %{
5859 base(0xc); // R12
5860 index($reg);
5861 scale(0x3);
5862 disp($off);
5863 %}
5864 %}
5865
5866 // Indirect Memory Operand
5867 operand indirectNarrow(rRegN reg)
5868 %{
5869 predicate(CompressedOops::shift() == 0);
5870 constraint(ALLOC_IN_RC(ptr_reg));
5871 match(DecodeN reg);
5872
5873 format %{ "[$reg]" %}
5874 interface(MEMORY_INTER) %{
5875 base($reg);
5876 index(0x4);
5877 scale(0x0);
5878 disp(0x0);
5879 %}
5880 %}
5881
5882 // Indirect Memory Plus Short Offset Operand
5883 operand indOffset8Narrow(rRegN reg, immL8 off)
5884 %{
5885 predicate(CompressedOops::shift() == 0);
5886 constraint(ALLOC_IN_RC(ptr_reg));
5887 match(AddP (DecodeN reg) off);
5888
5889 format %{ "[$reg + $off (8-bit)]" %}
5890 interface(MEMORY_INTER) %{
5891 base($reg);
5892 index(0x4);
5893 scale(0x0);
5894 disp($off);
5895 %}
5896 %}
5897
5898 // Indirect Memory Plus Long Offset Operand
5899 operand indOffset32Narrow(rRegN reg, immL32 off)
5900 %{
5901 predicate(CompressedOops::shift() == 0);
5902 constraint(ALLOC_IN_RC(ptr_reg));
5903 match(AddP (DecodeN reg) off);
5904
5905 format %{ "[$reg + $off (32-bit)]" %}
5906 interface(MEMORY_INTER) %{
5907 base($reg);
5908 index(0x4);
5909 scale(0x0);
5910 disp($off);
5911 %}
5912 %}
5913
5914 // Indirect Memory Plus Index Register Plus Offset Operand
5915 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5916 %{
5917 predicate(CompressedOops::shift() == 0);
5918 constraint(ALLOC_IN_RC(ptr_reg));
5919 match(AddP (AddP (DecodeN reg) lreg) off);
5920
5921 op_cost(10);
5922 format %{"[$reg + $off + $lreg]" %}
5923 interface(MEMORY_INTER) %{
5924 base($reg);
5925 index($lreg);
5926 scale(0x0);
5927 disp($off);
5928 %}
5929 %}
5930
5931 // Indirect Memory Plus Index Register Plus Offset Operand
5932 operand indIndexNarrow(rRegN reg, rRegL lreg)
5933 %{
5934 predicate(CompressedOops::shift() == 0);
5935 constraint(ALLOC_IN_RC(ptr_reg));
5936 match(AddP (DecodeN reg) lreg);
5937
5938 op_cost(10);
5939 format %{"[$reg + $lreg]" %}
5940 interface(MEMORY_INTER) %{
5941 base($reg);
5942 index($lreg);
5943 scale(0x0);
5944 disp(0x0);
5945 %}
5946 %}
5947
5948 // Indirect Memory Times Scale Plus Index Register
5949 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5950 %{
5951 predicate(CompressedOops::shift() == 0);
5952 constraint(ALLOC_IN_RC(ptr_reg));
5953 match(AddP (DecodeN reg) (LShiftL lreg scale));
5954
5955 op_cost(10);
5956 format %{"[$reg + $lreg << $scale]" %}
5957 interface(MEMORY_INTER) %{
5958 base($reg);
5959 index($lreg);
5960 scale($scale);
5961 disp(0x0);
5962 %}
5963 %}
5964
5965 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5966 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5967 %{
5968 predicate(CompressedOops::shift() == 0);
5969 constraint(ALLOC_IN_RC(ptr_reg));
5970 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5971
5972 op_cost(10);
5973 format %{"[$reg + $off + $lreg << $scale]" %}
5974 interface(MEMORY_INTER) %{
5975 base($reg);
5976 index($lreg);
5977 scale($scale);
5978 disp($off);
5979 %}
5980 %}
5981
5982 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5983 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5984 %{
5985 constraint(ALLOC_IN_RC(ptr_reg));
5986 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5987 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5988
5989 op_cost(10);
5990 format %{"[$reg + $off + $idx]" %}
5991 interface(MEMORY_INTER) %{
5992 base($reg);
5993 index($idx);
5994 scale(0x0);
5995 disp($off);
5996 %}
5997 %}
5998
5999 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
6000 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
6001 %{
6002 constraint(ALLOC_IN_RC(ptr_reg));
6003 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
6004 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
6005
6006 op_cost(10);
6007 format %{"[$reg + $off + $idx << $scale]" %}
6008 interface(MEMORY_INTER) %{
6009 base($reg);
6010 index($idx);
6011 scale($scale);
6012 disp($off);
6013 %}
6014 %}
6015
6016 //----------Special Memory Operands--------------------------------------------
6017 // Stack Slot Operand - This operand is used for loading and storing temporary
6018 // values on the stack where a match requires a value to
6019 // flow through memory.
6020 operand stackSlotP(sRegP reg)
6021 %{
6022 constraint(ALLOC_IN_RC(stack_slots));
6023 // No match rule because this operand is only generated in matching
6024
6025 format %{ "[$reg]" %}
6026 interface(MEMORY_INTER) %{
6027 base(0x4); // RSP
6028 index(0x4); // No Index
6029 scale(0x0); // No Scale
6030 disp($reg); // Stack Offset
6031 %}
6032 %}
6033
6034 operand stackSlotI(sRegI reg)
6035 %{
6036 constraint(ALLOC_IN_RC(stack_slots));
6037 // No match rule because this operand is only generated in matching
6038
6039 format %{ "[$reg]" %}
6040 interface(MEMORY_INTER) %{
6041 base(0x4); // RSP
6042 index(0x4); // No Index
6043 scale(0x0); // No Scale
6044 disp($reg); // Stack Offset
6045 %}
6046 %}
6047
6048 operand stackSlotF(sRegF reg)
6049 %{
6050 constraint(ALLOC_IN_RC(stack_slots));
6051 // No match rule because this operand is only generated in matching
6052
6053 format %{ "[$reg]" %}
6054 interface(MEMORY_INTER) %{
6055 base(0x4); // RSP
6056 index(0x4); // No Index
6057 scale(0x0); // No Scale
6058 disp($reg); // Stack Offset
6059 %}
6060 %}
6061
6062 operand stackSlotD(sRegD reg)
6063 %{
6064 constraint(ALLOC_IN_RC(stack_slots));
6065 // No match rule because this operand is only generated in matching
6066
6067 format %{ "[$reg]" %}
6068 interface(MEMORY_INTER) %{
6069 base(0x4); // RSP
6070 index(0x4); // No Index
6071 scale(0x0); // No Scale
6072 disp($reg); // Stack Offset
6073 %}
6074 %}
6075 operand stackSlotL(sRegL reg)
6076 %{
6077 constraint(ALLOC_IN_RC(stack_slots));
6078 // No match rule because this operand is only generated in matching
6079
6080 format %{ "[$reg]" %}
6081 interface(MEMORY_INTER) %{
6082 base(0x4); // RSP
6083 index(0x4); // No Index
6084 scale(0x0); // No Scale
6085 disp($reg); // Stack Offset
6086 %}
6087 %}
6088
6089 //----------Conditional Branch Operands----------------------------------------
6090 // Comparison Op - This is the operation of the comparison, and is limited to
6091 // the following set of codes:
6092 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6093 //
6094 // Other attributes of the comparison, such as unsignedness, are specified
6095 // by the comparison instruction that sets a condition code flags register.
6096 // That result is represented by a flags operand whose subtype is appropriate
6097 // to the unsignedness (etc.) of the comparison.
6098 //
6099 // Later, the instruction which matches both the Comparison Op (a Bool) and
6100 // the flags (produced by the Cmp) specifies the coding of the comparison op
6101 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6102
6103 // Comparison Code
6104 operand cmpOp()
6105 %{
6106 match(Bool);
6107
6108 format %{ "" %}
6109 interface(COND_INTER) %{
6110 equal(0x4, "e");
6111 not_equal(0x5, "ne");
6112 less(0xc, "l");
6113 greater_equal(0xd, "ge");
6114 less_equal(0xe, "le");
6115 greater(0xf, "g");
6116 overflow(0x0, "o");
6117 no_overflow(0x1, "no");
6118 %}
6119 %}
6120
6121 // Comparison Code, unsigned compare. Used by FP also, with
6122 // C2 (unordered) turned into GT or LT already. The other bits
6123 // C0 and C3 are turned into Carry & Zero flags.
6124 operand cmpOpU()
6125 %{
6126 match(Bool);
6127
6128 format %{ "" %}
6129 interface(COND_INTER) %{
6130 equal(0x4, "e");
6131 not_equal(0x5, "ne");
6132 less(0x2, "b");
6133 greater_equal(0x3, "ae");
6134 less_equal(0x6, "be");
6135 greater(0x7, "a");
6136 overflow(0x0, "o");
6137 no_overflow(0x1, "no");
6138 %}
6139 %}
6140
6141
6142 // Floating comparisons that don't require any fixup for the unordered case,
6143 // If both inputs of the comparison are the same, ZF is always set so we
6144 // don't need to use cmpOpUCF2 for eq/ne
6145 operand cmpOpUCF() %{
6146 match(Bool);
6147 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6148 (n->as_Bool()->_test._test == BoolTest::lt ||
6149 n->as_Bool()->_test._test == BoolTest::ge ||
6150 n->as_Bool()->_test._test == BoolTest::le ||
6151 n->as_Bool()->_test._test == BoolTest::gt ||
6152 n->in(1)->in(1) == n->in(1)->in(2)));
6153 format %{ "" %}
6154 interface(COND_INTER) %{
6155 equal(0xb, "np");
6156 not_equal(0xa, "p");
6157 less(0x2, "b");
6158 greater_equal(0x3, "ae");
6159 less_equal(0x6, "be");
6160 greater(0x7, "a");
6161 overflow(0x0, "o");
6162 no_overflow(0x1, "no");
6163 %}
6164 %}
6165
6166
6167 // Floating comparisons that can be fixed up with extra conditional jumps
6168 operand cmpOpUCF2() %{
6169 match(Bool);
6170 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6171 (n->as_Bool()->_test._test == BoolTest::ne ||
6172 n->as_Bool()->_test._test == BoolTest::eq) &&
6173 n->in(1)->in(1) != n->in(1)->in(2));
6174 format %{ "" %}
6175 interface(COND_INTER) %{
6176 equal(0x4, "e");
6177 not_equal(0x5, "ne");
6178 less(0x2, "b");
6179 greater_equal(0x3, "ae");
6180 less_equal(0x6, "be");
6181 greater(0x7, "a");
6182 overflow(0x0, "o");
6183 no_overflow(0x1, "no");
6184 %}
6185 %}
6186
6187
6188 // Floating point comparisons that set condition flags to test more directly,
6189 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6190 // are used for L (<) and LE (<=) conditions. It's important to convert these
6191 // latter conditions to ones that use unsigned tests before passing into an
6192 // instruction because the preceding comparison might be based on a three way
6193 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6194 operand cmpOpUCFE()
6195 %{
6196 match(Bool);
6197 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6198 (n->as_Bool()->_test._test == BoolTest::ne ||
6199 n->as_Bool()->_test._test == BoolTest::eq ||
6200 n->as_Bool()->_test._test == BoolTest::lt ||
6201 n->as_Bool()->_test._test == BoolTest::ge ||
6202 n->as_Bool()->_test._test == BoolTest::le ||
6203 n->as_Bool()->_test._test == BoolTest::gt));
6204
6205 format %{ "" %}
6206 interface(COND_INTER) %{
6207 equal(0x4, "e");
6208 not_equal(0x5, "ne");
6209 less(0x2, "b");
6210 greater_equal(0x3, "ae");
6211 less_equal(0x6, "be");
6212 greater(0x7, "a");
6213 overflow(0x0, "o");
6214 no_overflow(0x1, "no");
6215 %}
6216 %}
6217
6218 // Operands for bound floating pointer register arguments
6219 operand rxmm0() %{
6220 constraint(ALLOC_IN_RC(xmm0_reg));
6221 match(VecX);
6222 format%{%}
6223 interface(REG_INTER);
6224 %}
6225
6226 // Vectors
6227
6228 // Dummy generic vector class. Should be used for all vector operands.
6229 // Replaced with vec[SDXYZ] during post-selection pass.
6230 operand vec() %{
6231 constraint(ALLOC_IN_RC(dynamic));
6232 match(VecX);
6233 match(VecY);
6234 match(VecZ);
6235 match(VecS);
6236 match(VecD);
6237
6238 format %{ %}
6239 interface(REG_INTER);
6240 %}
6241
6242 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6243 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6244 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6245 // runtime code generation via reg_class_dynamic.
6246 operand legVec() %{
6247 constraint(ALLOC_IN_RC(dynamic));
6248 match(VecX);
6249 match(VecY);
6250 match(VecZ);
6251 match(VecS);
6252 match(VecD);
6253
6254 format %{ %}
6255 interface(REG_INTER);
6256 %}
6257
6258 // Replaces vec during post-selection cleanup. See above.
6259 operand vecS() %{
6260 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6261 match(VecS);
6262
6263 format %{ %}
6264 interface(REG_INTER);
6265 %}
6266
6267 // Replaces legVec during post-selection cleanup. See above.
6268 operand legVecS() %{
6269 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6270 match(VecS);
6271
6272 format %{ %}
6273 interface(REG_INTER);
6274 %}
6275
6276 // Replaces vec during post-selection cleanup. See above.
6277 operand vecD() %{
6278 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6279 match(VecD);
6280
6281 format %{ %}
6282 interface(REG_INTER);
6283 %}
6284
6285 // Replaces legVec during post-selection cleanup. See above.
6286 operand legVecD() %{
6287 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6288 match(VecD);
6289
6290 format %{ %}
6291 interface(REG_INTER);
6292 %}
6293
6294 // Replaces vec during post-selection cleanup. See above.
6295 operand vecX() %{
6296 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6297 match(VecX);
6298
6299 format %{ %}
6300 interface(REG_INTER);
6301 %}
6302
6303 // Replaces legVec during post-selection cleanup. See above.
6304 operand legVecX() %{
6305 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6306 match(VecX);
6307
6308 format %{ %}
6309 interface(REG_INTER);
6310 %}
6311
6312 // Replaces vec during post-selection cleanup. See above.
6313 operand vecY() %{
6314 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6315 match(VecY);
6316
6317 format %{ %}
6318 interface(REG_INTER);
6319 %}
6320
6321 // Replaces legVec during post-selection cleanup. See above.
6322 operand legVecY() %{
6323 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6324 match(VecY);
6325
6326 format %{ %}
6327 interface(REG_INTER);
6328 %}
6329
6330 // Replaces vec during post-selection cleanup. See above.
6331 operand vecZ() %{
6332 constraint(ALLOC_IN_RC(vectorz_reg));
6333 match(VecZ);
6334
6335 format %{ %}
6336 interface(REG_INTER);
6337 %}
6338
6339 // Replaces legVec during post-selection cleanup. See above.
6340 operand legVecZ() %{
6341 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6342 match(VecZ);
6343
6344 format %{ %}
6345 interface(REG_INTER);
6346 %}
6347
6348 //----------OPERAND CLASSES----------------------------------------------------
6349 // Operand Classes are groups of operands that are used as to simplify
6350 // instruction definitions by not requiring the AD writer to specify separate
6351 // instructions for every form of operand when the instruction accepts
6352 // multiple operand types with the same basic encoding and format. The classic
6353 // case of this is memory operands.
6354
6355 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6356 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6357 indCompressedOop, indCompressedOopOffset,
6358 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6359 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6360 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6361
6362 //----------PIPELINE-----------------------------------------------------------
6363 // Rules which define the behavior of the target architectures pipeline.
6364 pipeline %{
6365
6366 //----------ATTRIBUTES---------------------------------------------------------
6367 attributes %{
6368 variable_size_instructions; // Fixed size instructions
6369 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6370 instruction_unit_size = 1; // An instruction is 1 bytes long
6371 instruction_fetch_unit_size = 16; // The processor fetches one line
6372 instruction_fetch_units = 1; // of 16 bytes
6373 %}
6374
6375 //----------RESOURCES----------------------------------------------------------
6376 // Resources are the functional units available to the machine
6377
6378 // Generic P2/P3 pipeline
6379 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6380 // 3 instructions decoded per cycle.
6381 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6382 // 3 ALU op, only ALU0 handles mul instructions.
6383 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6384 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6385 BR, FPU,
6386 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6387
6388 //----------PIPELINE DESCRIPTION-----------------------------------------------
6389 // Pipeline Description specifies the stages in the machine's pipeline
6390
6391 // Generic P2/P3 pipeline
6392 pipe_desc(S0, S1, S2, S3, S4, S5);
6393
6394 //----------PIPELINE CLASSES---------------------------------------------------
6395 // Pipeline Classes describe the stages in which input and output are
6396 // referenced by the hardware pipeline.
6397
6398 // Naming convention: ialu or fpu
6399 // Then: _reg
6400 // Then: _reg if there is a 2nd register
6401 // Then: _long if it's a pair of instructions implementing a long
6402 // Then: _fat if it requires the big decoder
6403 // Or: _mem if it requires the big decoder and a memory unit.
6404
6405 // Integer ALU reg operation
6406 pipe_class ialu_reg(rRegI dst)
6407 %{
6408 single_instruction;
6409 dst : S4(write);
6410 dst : S3(read);
6411 DECODE : S0; // any decoder
6412 ALU : S3; // any alu
6413 %}
6414
6415 // Long ALU reg operation
6416 pipe_class ialu_reg_long(rRegL dst)
6417 %{
6418 instruction_count(2);
6419 dst : S4(write);
6420 dst : S3(read);
6421 DECODE : S0(2); // any 2 decoders
6422 ALU : S3(2); // both alus
6423 %}
6424
6425 // Integer ALU reg operation using big decoder
6426 pipe_class ialu_reg_fat(rRegI dst)
6427 %{
6428 single_instruction;
6429 dst : S4(write);
6430 dst : S3(read);
6431 D0 : S0; // big decoder only
6432 ALU : S3; // any alu
6433 %}
6434
6435 // Integer ALU reg-reg operation
6436 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6437 %{
6438 single_instruction;
6439 dst : S4(write);
6440 src : S3(read);
6441 DECODE : S0; // any decoder
6442 ALU : S3; // any alu
6443 %}
6444
6445 // Integer ALU reg-reg operation
6446 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6447 %{
6448 single_instruction;
6449 dst : S4(write);
6450 src : S3(read);
6451 D0 : S0; // big decoder only
6452 ALU : S3; // any alu
6453 %}
6454
6455 // Integer ALU reg-mem operation
6456 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6457 %{
6458 single_instruction;
6459 dst : S5(write);
6460 mem : S3(read);
6461 D0 : S0; // big decoder only
6462 ALU : S4; // any alu
6463 MEM : S3; // any mem
6464 %}
6465
6466 // Integer mem operation (prefetch)
6467 pipe_class ialu_mem(memory mem)
6468 %{
6469 single_instruction;
6470 mem : S3(read);
6471 D0 : S0; // big decoder only
6472 MEM : S3; // any mem
6473 %}
6474
6475 // Integer Store to Memory
6476 pipe_class ialu_mem_reg(memory mem, rRegI src)
6477 %{
6478 single_instruction;
6479 mem : S3(read);
6480 src : S5(read);
6481 D0 : S0; // big decoder only
6482 ALU : S4; // any alu
6483 MEM : S3;
6484 %}
6485
6486 // // Long Store to Memory
6487 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6488 // %{
6489 // instruction_count(2);
6490 // mem : S3(read);
6491 // src : S5(read);
6492 // D0 : S0(2); // big decoder only; twice
6493 // ALU : S4(2); // any 2 alus
6494 // MEM : S3(2); // Both mems
6495 // %}
6496
6497 // Integer Store to Memory
6498 pipe_class ialu_mem_imm(memory mem)
6499 %{
6500 single_instruction;
6501 mem : S3(read);
6502 D0 : S0; // big decoder only
6503 ALU : S4; // any alu
6504 MEM : S3;
6505 %}
6506
6507 // Integer ALU0 reg-reg operation
6508 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6509 %{
6510 single_instruction;
6511 dst : S4(write);
6512 src : S3(read);
6513 D0 : S0; // Big decoder only
6514 ALU0 : S3; // only alu0
6515 %}
6516
6517 // Integer ALU0 reg-mem operation
6518 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6519 %{
6520 single_instruction;
6521 dst : S5(write);
6522 mem : S3(read);
6523 D0 : S0; // big decoder only
6524 ALU0 : S4; // ALU0 only
6525 MEM : S3; // any mem
6526 %}
6527
6528 // Integer ALU reg-reg operation
6529 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6530 %{
6531 single_instruction;
6532 cr : S4(write);
6533 src1 : S3(read);
6534 src2 : S3(read);
6535 DECODE : S0; // any decoder
6536 ALU : S3; // any alu
6537 %}
6538
6539 // Integer ALU reg-imm operation
6540 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6541 %{
6542 single_instruction;
6543 cr : S4(write);
6544 src1 : S3(read);
6545 DECODE : S0; // any decoder
6546 ALU : S3; // any alu
6547 %}
6548
6549 // Integer ALU reg-mem operation
6550 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6551 %{
6552 single_instruction;
6553 cr : S4(write);
6554 src1 : S3(read);
6555 src2 : S3(read);
6556 D0 : S0; // big decoder only
6557 ALU : S4; // any alu
6558 MEM : S3;
6559 %}
6560
6561 // Conditional move reg-reg
6562 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6563 %{
6564 instruction_count(4);
6565 y : S4(read);
6566 q : S3(read);
6567 p : S3(read);
6568 DECODE : S0(4); // any decoder
6569 %}
6570
6571 // Conditional move reg-reg
6572 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6573 %{
6574 single_instruction;
6575 dst : S4(write);
6576 src : S3(read);
6577 cr : S3(read);
6578 DECODE : S0; // any decoder
6579 %}
6580
6581 // Conditional move reg-mem
6582 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6583 %{
6584 single_instruction;
6585 dst : S4(write);
6586 src : S3(read);
6587 cr : S3(read);
6588 DECODE : S0; // any decoder
6589 MEM : S3;
6590 %}
6591
6592 // Conditional move reg-reg long
6593 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6594 %{
6595 single_instruction;
6596 dst : S4(write);
6597 src : S3(read);
6598 cr : S3(read);
6599 DECODE : S0(2); // any 2 decoders
6600 %}
6601
6602 // Float reg-reg operation
6603 pipe_class fpu_reg(regD dst)
6604 %{
6605 instruction_count(2);
6606 dst : S3(read);
6607 DECODE : S0(2); // any 2 decoders
6608 FPU : S3;
6609 %}
6610
6611 // Float reg-reg operation
6612 pipe_class fpu_reg_reg(regD dst, regD src)
6613 %{
6614 instruction_count(2);
6615 dst : S4(write);
6616 src : S3(read);
6617 DECODE : S0(2); // any 2 decoders
6618 FPU : S3;
6619 %}
6620
6621 // Float reg-reg operation
6622 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6623 %{
6624 instruction_count(3);
6625 dst : S4(write);
6626 src1 : S3(read);
6627 src2 : S3(read);
6628 DECODE : S0(3); // any 3 decoders
6629 FPU : S3(2);
6630 %}
6631
6632 // Float reg-reg operation
6633 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6634 %{
6635 instruction_count(4);
6636 dst : S4(write);
6637 src1 : S3(read);
6638 src2 : S3(read);
6639 src3 : S3(read);
6640 DECODE : S0(4); // any 3 decoders
6641 FPU : S3(2);
6642 %}
6643
6644 // Float reg-reg operation
6645 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6646 %{
6647 instruction_count(4);
6648 dst : S4(write);
6649 src1 : S3(read);
6650 src2 : S3(read);
6651 src3 : S3(read);
6652 DECODE : S1(3); // any 3 decoders
6653 D0 : S0; // Big decoder only
6654 FPU : S3(2);
6655 MEM : S3;
6656 %}
6657
6658 // Float reg-mem operation
6659 pipe_class fpu_reg_mem(regD dst, memory mem)
6660 %{
6661 instruction_count(2);
6662 dst : S5(write);
6663 mem : S3(read);
6664 D0 : S0; // big decoder only
6665 DECODE : S1; // any decoder for FPU POP
6666 FPU : S4;
6667 MEM : S3; // any mem
6668 %}
6669
6670 // Float reg-mem operation
6671 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6672 %{
6673 instruction_count(3);
6674 dst : S5(write);
6675 src1 : S3(read);
6676 mem : S3(read);
6677 D0 : S0; // big decoder only
6678 DECODE : S1(2); // any decoder for FPU POP
6679 FPU : S4;
6680 MEM : S3; // any mem
6681 %}
6682
6683 // Float mem-reg operation
6684 pipe_class fpu_mem_reg(memory mem, regD src)
6685 %{
6686 instruction_count(2);
6687 src : S5(read);
6688 mem : S3(read);
6689 DECODE : S0; // any decoder for FPU PUSH
6690 D0 : S1; // big decoder only
6691 FPU : S4;
6692 MEM : S3; // any mem
6693 %}
6694
6695 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6696 %{
6697 instruction_count(3);
6698 src1 : S3(read);
6699 src2 : S3(read);
6700 mem : S3(read);
6701 DECODE : S0(2); // any decoder for FPU PUSH
6702 D0 : S1; // big decoder only
6703 FPU : S4;
6704 MEM : S3; // any mem
6705 %}
6706
6707 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6708 %{
6709 instruction_count(3);
6710 src1 : S3(read);
6711 src2 : S3(read);
6712 mem : S4(read);
6713 DECODE : S0; // any decoder for FPU PUSH
6714 D0 : S0(2); // big decoder only
6715 FPU : S4;
6716 MEM : S3(2); // any mem
6717 %}
6718
6719 pipe_class fpu_mem_mem(memory dst, memory src1)
6720 %{
6721 instruction_count(2);
6722 src1 : S3(read);
6723 dst : S4(read);
6724 D0 : S0(2); // big decoder only
6725 MEM : S3(2); // any mem
6726 %}
6727
6728 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6729 %{
6730 instruction_count(3);
6731 src1 : S3(read);
6732 src2 : S3(read);
6733 dst : S4(read);
6734 D0 : S0(3); // big decoder only
6735 FPU : S4;
6736 MEM : S3(3); // any mem
6737 %}
6738
6739 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6740 %{
6741 instruction_count(3);
6742 src1 : S4(read);
6743 mem : S4(read);
6744 DECODE : S0; // any decoder for FPU PUSH
6745 D0 : S0(2); // big decoder only
6746 FPU : S4;
6747 MEM : S3(2); // any mem
6748 %}
6749
6750 // Float load constant
6751 pipe_class fpu_reg_con(regD dst)
6752 %{
6753 instruction_count(2);
6754 dst : S5(write);
6755 D0 : S0; // big decoder only for the load
6756 DECODE : S1; // any decoder for FPU POP
6757 FPU : S4;
6758 MEM : S3; // any mem
6759 %}
6760
6761 // Float load constant
6762 pipe_class fpu_reg_reg_con(regD dst, regD src)
6763 %{
6764 instruction_count(3);
6765 dst : S5(write);
6766 src : S3(read);
6767 D0 : S0; // big decoder only for the load
6768 DECODE : S1(2); // any decoder for FPU POP
6769 FPU : S4;
6770 MEM : S3; // any mem
6771 %}
6772
6773 // UnConditional branch
6774 pipe_class pipe_jmp(label labl)
6775 %{
6776 single_instruction;
6777 BR : S3;
6778 %}
6779
6780 // Conditional branch
6781 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6782 %{
6783 single_instruction;
6784 cr : S1(read);
6785 BR : S3;
6786 %}
6787
6788 // Allocation idiom
6789 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6790 %{
6791 instruction_count(1); force_serialization;
6792 fixed_latency(6);
6793 heap_ptr : S3(read);
6794 DECODE : S0(3);
6795 D0 : S2;
6796 MEM : S3;
6797 ALU : S3(2);
6798 dst : S5(write);
6799 BR : S5;
6800 %}
6801
6802 // Generic big/slow expanded idiom
6803 pipe_class pipe_slow()
6804 %{
6805 instruction_count(10); multiple_bundles; force_serialization;
6806 fixed_latency(100);
6807 D0 : S0(2);
6808 MEM : S3(2);
6809 %}
6810
6811 // The real do-nothing guy
6812 pipe_class empty()
6813 %{
6814 instruction_count(0);
6815 %}
6816
6817 // Define the class for the Nop node
6818 define
6819 %{
6820 MachNop = empty;
6821 %}
6822
6823 %}
6824
6825 //----------INSTRUCTIONS-------------------------------------------------------
6826 //
6827 // match -- States which machine-independent subtree may be replaced
6828 // by this instruction.
6829 // ins_cost -- The estimated cost of this instruction is used by instruction
6830 // selection to identify a minimum cost tree of machine
6831 // instructions that matches a tree of machine-independent
6832 // instructions.
6833 // format -- A string providing the disassembly for this instruction.
6834 // The value of an instruction's operand may be inserted
6835 // by referring to it with a '$' prefix.
6836 // opcode -- Three instruction opcodes may be provided. These are referred
6837 // to within an encode class as $primary, $secondary, and $tertiary
6838 // rrspectively. The primary opcode is commonly used to
6839 // indicate the type of machine instruction, while secondary
6840 // and tertiary are often used for prefix options or addressing
6841 // modes.
6842 // ins_encode -- A list of encode classes with parameters. The encode class
6843 // name must have been defined in an 'enc_class' specification
6844 // in the encode section of the architecture description.
6845
6846 // ============================================================================
6847
6848 instruct ShouldNotReachHere() %{
6849 match(Halt);
6850 format %{ "stop\t# ShouldNotReachHere" %}
6851 ins_encode %{
6852 if (is_reachable()) {
6853 const char* str = __ code_string(_halt_reason);
6854 __ stop(str);
6855 }
6856 %}
6857 ins_pipe(pipe_slow);
6858 %}
6859
6860 // ============================================================================
6861
6862 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6863 // Load Float
6864 instruct MoveF2VL(vlRegF dst, regF src) %{
6865 match(Set dst src);
6866 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6867 ins_encode %{
6868 ShouldNotReachHere();
6869 %}
6870 ins_pipe( fpu_reg_reg );
6871 %}
6872
6873 // Load Float
6874 instruct MoveF2LEG(legRegF dst, regF src) %{
6875 match(Set dst src);
6876 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6877 ins_encode %{
6878 ShouldNotReachHere();
6879 %}
6880 ins_pipe( fpu_reg_reg );
6881 %}
6882
6883 // Load Float
6884 instruct MoveVL2F(regF dst, vlRegF src) %{
6885 match(Set dst src);
6886 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6887 ins_encode %{
6888 ShouldNotReachHere();
6889 %}
6890 ins_pipe( fpu_reg_reg );
6891 %}
6892
6893 // Load Float
6894 instruct MoveLEG2F(regF dst, legRegF src) %{
6895 match(Set dst src);
6896 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6897 ins_encode %{
6898 ShouldNotReachHere();
6899 %}
6900 ins_pipe( fpu_reg_reg );
6901 %}
6902
6903 // Load Double
6904 instruct MoveD2VL(vlRegD dst, regD src) %{
6905 match(Set dst src);
6906 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6907 ins_encode %{
6908 ShouldNotReachHere();
6909 %}
6910 ins_pipe( fpu_reg_reg );
6911 %}
6912
6913 // Load Double
6914 instruct MoveD2LEG(legRegD dst, regD src) %{
6915 match(Set dst src);
6916 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6917 ins_encode %{
6918 ShouldNotReachHere();
6919 %}
6920 ins_pipe( fpu_reg_reg );
6921 %}
6922
6923 // Load Double
6924 instruct MoveVL2D(regD dst, vlRegD src) %{
6925 match(Set dst src);
6926 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6927 ins_encode %{
6928 ShouldNotReachHere();
6929 %}
6930 ins_pipe( fpu_reg_reg );
6931 %}
6932
6933 // Load Double
6934 instruct MoveLEG2D(regD dst, legRegD src) %{
6935 match(Set dst src);
6936 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6937 ins_encode %{
6938 ShouldNotReachHere();
6939 %}
6940 ins_pipe( fpu_reg_reg );
6941 %}
6942
6943 //----------Load/Store/Move Instructions---------------------------------------
6944 //----------Load Instructions--------------------------------------------------
6945
6946 // Load Byte (8 bit signed)
6947 instruct loadB(rRegI dst, memory mem)
6948 %{
6949 match(Set dst (LoadB mem));
6950
6951 ins_cost(125);
6952 format %{ "movsbl $dst, $mem\t# byte" %}
6953
6954 ins_encode %{
6955 __ movsbl($dst$$Register, $mem$$Address);
6956 %}
6957
6958 ins_pipe(ialu_reg_mem);
6959 %}
6960
6961 // Load Byte (8 bit signed) into Long Register
6962 instruct loadB2L(rRegL dst, memory mem)
6963 %{
6964 match(Set dst (ConvI2L (LoadB mem)));
6965
6966 ins_cost(125);
6967 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6968
6969 ins_encode %{
6970 __ movsbq($dst$$Register, $mem$$Address);
6971 %}
6972
6973 ins_pipe(ialu_reg_mem);
6974 %}
6975
6976 // Load Unsigned Byte (8 bit UNsigned)
6977 instruct loadUB(rRegI dst, memory mem)
6978 %{
6979 match(Set dst (LoadUB mem));
6980
6981 ins_cost(125);
6982 format %{ "movzbl $dst, $mem\t# ubyte" %}
6983
6984 ins_encode %{
6985 __ movzbl($dst$$Register, $mem$$Address);
6986 %}
6987
6988 ins_pipe(ialu_reg_mem);
6989 %}
6990
6991 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6992 instruct loadUB2L(rRegL dst, memory mem)
6993 %{
6994 match(Set dst (ConvI2L (LoadUB mem)));
6995
6996 ins_cost(125);
6997 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6998
6999 ins_encode %{
7000 __ movzbq($dst$$Register, $mem$$Address);
7001 %}
7002
7003 ins_pipe(ialu_reg_mem);
7004 %}
7005
7006 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
7007 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7008 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
7009 effect(KILL cr);
7010
7011 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
7012 "andl $dst, right_n_bits($mask, 8)" %}
7013 ins_encode %{
7014 Register Rdst = $dst$$Register;
7015 __ movzbq(Rdst, $mem$$Address);
7016 __ andl(Rdst, $mask$$constant & right_n_bits(8));
7017 %}
7018 ins_pipe(ialu_reg_mem);
7019 %}
7020
7021 // Load Short (16 bit signed)
7022 instruct loadS(rRegI dst, memory mem)
7023 %{
7024 match(Set dst (LoadS mem));
7025
7026 ins_cost(125);
7027 format %{ "movswl $dst, $mem\t# short" %}
7028
7029 ins_encode %{
7030 __ movswl($dst$$Register, $mem$$Address);
7031 %}
7032
7033 ins_pipe(ialu_reg_mem);
7034 %}
7035
7036 // Load Short (16 bit signed) to Byte (8 bit signed)
7037 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7038 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
7039
7040 ins_cost(125);
7041 format %{ "movsbl $dst, $mem\t# short -> byte" %}
7042 ins_encode %{
7043 __ movsbl($dst$$Register, $mem$$Address);
7044 %}
7045 ins_pipe(ialu_reg_mem);
7046 %}
7047
7048 // Load Short (16 bit signed) into Long Register
7049 instruct loadS2L(rRegL dst, memory mem)
7050 %{
7051 match(Set dst (ConvI2L (LoadS mem)));
7052
7053 ins_cost(125);
7054 format %{ "movswq $dst, $mem\t# short -> long" %}
7055
7056 ins_encode %{
7057 __ movswq($dst$$Register, $mem$$Address);
7058 %}
7059
7060 ins_pipe(ialu_reg_mem);
7061 %}
7062
7063 // Load Unsigned Short/Char (16 bit UNsigned)
7064 instruct loadUS(rRegI dst, memory mem)
7065 %{
7066 match(Set dst (LoadUS mem));
7067
7068 ins_cost(125);
7069 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7070
7071 ins_encode %{
7072 __ movzwl($dst$$Register, $mem$$Address);
7073 %}
7074
7075 ins_pipe(ialu_reg_mem);
7076 %}
7077
7078 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7079 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7080 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7081
7082 ins_cost(125);
7083 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7084 ins_encode %{
7085 __ movsbl($dst$$Register, $mem$$Address);
7086 %}
7087 ins_pipe(ialu_reg_mem);
7088 %}
7089
7090 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7091 instruct loadUS2L(rRegL dst, memory mem)
7092 %{
7093 match(Set dst (ConvI2L (LoadUS mem)));
7094
7095 ins_cost(125);
7096 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7097
7098 ins_encode %{
7099 __ movzwq($dst$$Register, $mem$$Address);
7100 %}
7101
7102 ins_pipe(ialu_reg_mem);
7103 %}
7104
7105 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7106 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7107 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7108
7109 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7110 ins_encode %{
7111 __ movzbq($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem);
7114 %}
7115
7116 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7117 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7118 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7119 effect(KILL cr);
7120
7121 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7122 "andl $dst, right_n_bits($mask, 16)" %}
7123 ins_encode %{
7124 Register Rdst = $dst$$Register;
7125 __ movzwq(Rdst, $mem$$Address);
7126 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7127 %}
7128 ins_pipe(ialu_reg_mem);
7129 %}
7130
7131 // Load Integer
7132 instruct loadI(rRegI dst, memory mem)
7133 %{
7134 match(Set dst (LoadI mem));
7135
7136 ins_cost(125);
7137 format %{ "movl $dst, $mem\t# int" %}
7138
7139 ins_encode %{
7140 __ movl($dst$$Register, $mem$$Address);
7141 %}
7142
7143 ins_pipe(ialu_reg_mem);
7144 %}
7145
7146 // Load Integer (32 bit signed) to Byte (8 bit signed)
7147 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7148 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7149
7150 ins_cost(125);
7151 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7152 ins_encode %{
7153 __ movsbl($dst$$Register, $mem$$Address);
7154 %}
7155 ins_pipe(ialu_reg_mem);
7156 %}
7157
7158 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7159 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7160 match(Set dst (AndI (LoadI mem) mask));
7161
7162 ins_cost(125);
7163 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7164 ins_encode %{
7165 __ movzbl($dst$$Register, $mem$$Address);
7166 %}
7167 ins_pipe(ialu_reg_mem);
7168 %}
7169
7170 // Load Integer (32 bit signed) to Short (16 bit signed)
7171 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7172 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7173
7174 ins_cost(125);
7175 format %{ "movswl $dst, $mem\t# int -> short" %}
7176 ins_encode %{
7177 __ movswl($dst$$Register, $mem$$Address);
7178 %}
7179 ins_pipe(ialu_reg_mem);
7180 %}
7181
7182 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7183 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7184 match(Set dst (AndI (LoadI mem) mask));
7185
7186 ins_cost(125);
7187 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7188 ins_encode %{
7189 __ movzwl($dst$$Register, $mem$$Address);
7190 %}
7191 ins_pipe(ialu_reg_mem);
7192 %}
7193
7194 // Load Integer into Long Register
7195 instruct loadI2L(rRegL dst, memory mem)
7196 %{
7197 match(Set dst (ConvI2L (LoadI mem)));
7198
7199 ins_cost(125);
7200 format %{ "movslq $dst, $mem\t# int -> long" %}
7201
7202 ins_encode %{
7203 __ movslq($dst$$Register, $mem$$Address);
7204 %}
7205
7206 ins_pipe(ialu_reg_mem);
7207 %}
7208
7209 // Load Integer with mask 0xFF into Long Register
7210 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7211 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7212
7213 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7214 ins_encode %{
7215 __ movzbq($dst$$Register, $mem$$Address);
7216 %}
7217 ins_pipe(ialu_reg_mem);
7218 %}
7219
7220 // Load Integer with mask 0xFFFF into Long Register
7221 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7222 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7223
7224 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7225 ins_encode %{
7226 __ movzwq($dst$$Register, $mem$$Address);
7227 %}
7228 ins_pipe(ialu_reg_mem);
7229 %}
7230
7231 // Load Integer with a 31-bit mask into Long Register
7232 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7233 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7234 effect(KILL cr);
7235
7236 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7237 "andl $dst, $mask" %}
7238 ins_encode %{
7239 Register Rdst = $dst$$Register;
7240 __ movl(Rdst, $mem$$Address);
7241 __ andl(Rdst, $mask$$constant);
7242 %}
7243 ins_pipe(ialu_reg_mem);
7244 %}
7245
7246 // Load Unsigned Integer into Long Register
7247 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7248 %{
7249 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7250
7251 ins_cost(125);
7252 format %{ "movl $dst, $mem\t# uint -> long" %}
7253
7254 ins_encode %{
7255 __ movl($dst$$Register, $mem$$Address);
7256 %}
7257
7258 ins_pipe(ialu_reg_mem);
7259 %}
7260
7261 // Load Long
7262 instruct loadL(rRegL dst, memory mem)
7263 %{
7264 match(Set dst (LoadL mem));
7265
7266 ins_cost(125);
7267 format %{ "movq $dst, $mem\t# long" %}
7268
7269 ins_encode %{
7270 __ movq($dst$$Register, $mem$$Address);
7271 %}
7272
7273 ins_pipe(ialu_reg_mem); // XXX
7274 %}
7275
7276 // Load Range
7277 instruct loadRange(rRegI dst, memory mem)
7278 %{
7279 match(Set dst (LoadRange mem));
7280
7281 ins_cost(125); // XXX
7282 format %{ "movl $dst, $mem\t# range" %}
7283 ins_encode %{
7284 __ movl($dst$$Register, $mem$$Address);
7285 %}
7286 ins_pipe(ialu_reg_mem);
7287 %}
7288
7289 // Load Pointer
7290 instruct loadP(rRegP dst, memory mem)
7291 %{
7292 match(Set dst (LoadP mem));
7293 predicate(n->as_Load()->barrier_data() == 0);
7294
7295 ins_cost(125); // XXX
7296 format %{ "movq $dst, $mem\t# ptr" %}
7297 ins_encode %{
7298 __ movq($dst$$Register, $mem$$Address);
7299 %}
7300 ins_pipe(ialu_reg_mem); // XXX
7301 %}
7302
7303 // Load Compressed Pointer
7304 instruct loadN(rRegN dst, memory mem)
7305 %{
7306 predicate(n->as_Load()->barrier_data() == 0);
7307 match(Set dst (LoadN mem));
7308
7309 ins_cost(125); // XXX
7310 format %{ "movl $dst, $mem\t# compressed ptr" %}
7311 ins_encode %{
7312 __ movl($dst$$Register, $mem$$Address);
7313 %}
7314 ins_pipe(ialu_reg_mem); // XXX
7315 %}
7316
7317
7318 // Load Klass Pointer
7319 instruct loadKlass(rRegP dst, memory mem)
7320 %{
7321 match(Set dst (LoadKlass mem));
7322
7323 ins_cost(125); // XXX
7324 format %{ "movq $dst, $mem\t# class" %}
7325 ins_encode %{
7326 __ movq($dst$$Register, $mem$$Address);
7327 %}
7328 ins_pipe(ialu_reg_mem); // XXX
7329 %}
7330
7331 // Load narrow Klass Pointer
7332 instruct loadNKlass(rRegN dst, memory mem)
7333 %{
7334 predicate(!UseCompactObjectHeaders);
7335 match(Set dst (LoadNKlass mem));
7336
7337 ins_cost(125); // XXX
7338 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7339 ins_encode %{
7340 __ movl($dst$$Register, $mem$$Address);
7341 %}
7342 ins_pipe(ialu_reg_mem); // XXX
7343 %}
7344
7345 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7346 %{
7347 predicate(UseCompactObjectHeaders);
7348 match(Set dst (LoadNKlass mem));
7349 effect(KILL cr);
7350 ins_cost(125);
7351 format %{
7352 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7353 "shrl $dst, markWord::klass_shift_at_offset"
7354 %}
7355 ins_encode %{
7356 if (UseAPX) {
7357 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7358 }
7359 else {
7360 __ movl($dst$$Register, $mem$$Address);
7361 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7362 }
7363 %}
7364 ins_pipe(ialu_reg_mem);
7365 %}
7366
7367 // Load Float
7368 instruct loadF(regF dst, memory mem)
7369 %{
7370 match(Set dst (LoadF mem));
7371
7372 ins_cost(145); // XXX
7373 format %{ "movss $dst, $mem\t# float" %}
7374 ins_encode %{
7375 __ movflt($dst$$XMMRegister, $mem$$Address);
7376 %}
7377 ins_pipe(pipe_slow); // XXX
7378 %}
7379
7380 // Load Double
7381 instruct loadD_partial(regD dst, memory mem)
7382 %{
7383 predicate(!UseXmmLoadAndClearUpper);
7384 match(Set dst (LoadD mem));
7385
7386 ins_cost(145); // XXX
7387 format %{ "movlpd $dst, $mem\t# double" %}
7388 ins_encode %{
7389 __ movdbl($dst$$XMMRegister, $mem$$Address);
7390 %}
7391 ins_pipe(pipe_slow); // XXX
7392 %}
7393
7394 instruct loadD(regD dst, memory mem)
7395 %{
7396 predicate(UseXmmLoadAndClearUpper);
7397 match(Set dst (LoadD mem));
7398
7399 ins_cost(145); // XXX
7400 format %{ "movsd $dst, $mem\t# double" %}
7401 ins_encode %{
7402 __ movdbl($dst$$XMMRegister, $mem$$Address);
7403 %}
7404 ins_pipe(pipe_slow); // XXX
7405 %}
7406
7407 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7408 %{
7409 match(Set dst con);
7410
7411 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7412
7413 ins_encode %{
7414 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7415 %}
7416
7417 ins_pipe(ialu_reg_fat);
7418 %}
7419
7420 // max = java.lang.Math.max(float a, float b)
7421 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7422 predicate(VM_Version::supports_avx10_2());
7423 match(Set dst (MaxF a b));
7424 format %{ "maxF $dst, $a, $b" %}
7425 ins_encode %{
7426 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7427 %}
7428 ins_pipe( pipe_slow );
7429 %}
7430
7431 // max = java.lang.Math.max(float a, float b)
7432 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7433 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7434 match(Set dst (MaxF a b));
7435 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7436 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7437 ins_encode %{
7438 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7439 %}
7440 ins_pipe( pipe_slow );
7441 %}
7442
7443 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7444 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7445 match(Set dst (MaxF a b));
7446 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7447
7448 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7449 ins_encode %{
7450 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7451 false /*min*/, true /*single*/);
7452 %}
7453 ins_pipe( pipe_slow );
7454 %}
7455
7456 // max = java.lang.Math.max(double a, double b)
7457 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7458 predicate(VM_Version::supports_avx10_2());
7459 match(Set dst (MaxD a b));
7460 format %{ "maxD $dst, $a, $b" %}
7461 ins_encode %{
7462 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7463 %}
7464 ins_pipe( pipe_slow );
7465 %}
7466
7467 // max = java.lang.Math.max(double a, double b)
7468 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7469 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7470 match(Set dst (MaxD a b));
7471 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7472 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7473 ins_encode %{
7474 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7475 %}
7476 ins_pipe( pipe_slow );
7477 %}
7478
7479 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7480 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7481 match(Set dst (MaxD a b));
7482 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7483
7484 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7485 ins_encode %{
7486 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7487 false /*min*/, false /*single*/);
7488 %}
7489 ins_pipe( pipe_slow );
7490 %}
7491
7492 // max = java.lang.Math.min(float a, float b)
7493 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7494 predicate(VM_Version::supports_avx10_2());
7495 match(Set dst (MinF a b));
7496 format %{ "minF $dst, $a, $b" %}
7497 ins_encode %{
7498 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7499 %}
7500 ins_pipe( pipe_slow );
7501 %}
7502
7503 // min = java.lang.Math.min(float a, float b)
7504 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7505 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7506 match(Set dst (MinF a b));
7507 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7508 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7509 ins_encode %{
7510 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7511 %}
7512 ins_pipe( pipe_slow );
7513 %}
7514
7515 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7516 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7517 match(Set dst (MinF a b));
7518 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7519
7520 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7521 ins_encode %{
7522 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7523 true /*min*/, true /*single*/);
7524 %}
7525 ins_pipe( pipe_slow );
7526 %}
7527
7528 // max = java.lang.Math.min(double a, double b)
7529 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7530 predicate(VM_Version::supports_avx10_2());
7531 match(Set dst (MinD a b));
7532 format %{ "minD $dst, $a, $b" %}
7533 ins_encode %{
7534 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7535 %}
7536 ins_pipe( pipe_slow );
7537 %}
7538
7539 // min = java.lang.Math.min(double a, double b)
7540 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7541 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7542 match(Set dst (MinD a b));
7543 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7544 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7545 ins_encode %{
7546 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7547 %}
7548 ins_pipe( pipe_slow );
7549 %}
7550
7551 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7552 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7553 match(Set dst (MinD a b));
7554 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7555
7556 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7557 ins_encode %{
7558 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7559 true /*min*/, false /*single*/);
7560 %}
7561 ins_pipe( pipe_slow );
7562 %}
7563
7564 // Load Effective Address
7565 instruct leaP8(rRegP dst, indOffset8 mem)
7566 %{
7567 match(Set dst mem);
7568
7569 ins_cost(110); // XXX
7570 format %{ "leaq $dst, $mem\t# ptr 8" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaP32(rRegP dst, indOffset32 mem)
7578 %{
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr 32" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7590 %{
7591 match(Set dst mem);
7592
7593 ins_cost(110);
7594 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7595 ins_encode %{
7596 __ leaq($dst$$Register, $mem$$Address);
7597 %}
7598 ins_pipe(ialu_reg_reg_fat);
7599 %}
7600
7601 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7602 %{
7603 match(Set dst mem);
7604
7605 ins_cost(110);
7606 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7607 ins_encode %{
7608 __ leaq($dst$$Register, $mem$$Address);
7609 %}
7610 ins_pipe(ialu_reg_reg_fat);
7611 %}
7612
7613 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7614 %{
7615 match(Set dst mem);
7616
7617 ins_cost(110);
7618 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7619 ins_encode %{
7620 __ leaq($dst$$Register, $mem$$Address);
7621 %}
7622 ins_pipe(ialu_reg_reg_fat);
7623 %}
7624
7625 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7626 %{
7627 match(Set dst mem);
7628
7629 ins_cost(110);
7630 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7631 ins_encode %{
7632 __ leaq($dst$$Register, $mem$$Address);
7633 %}
7634 ins_pipe(ialu_reg_reg_fat);
7635 %}
7636
7637 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7638 %{
7639 match(Set dst mem);
7640
7641 ins_cost(110);
7642 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7643 ins_encode %{
7644 __ leaq($dst$$Register, $mem$$Address);
7645 %}
7646 ins_pipe(ialu_reg_reg_fat);
7647 %}
7648
7649 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7650 %{
7651 match(Set dst mem);
7652
7653 ins_cost(110);
7654 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7655 ins_encode %{
7656 __ leaq($dst$$Register, $mem$$Address);
7657 %}
7658 ins_pipe(ialu_reg_reg_fat);
7659 %}
7660
7661 // Load Effective Address which uses Narrow (32-bits) oop
7662 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7663 %{
7664 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7665 match(Set dst mem);
7666
7667 ins_cost(110);
7668 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7669 ins_encode %{
7670 __ leaq($dst$$Register, $mem$$Address);
7671 %}
7672 ins_pipe(ialu_reg_reg_fat);
7673 %}
7674
7675 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7676 %{
7677 predicate(CompressedOops::shift() == 0);
7678 match(Set dst mem);
7679
7680 ins_cost(110); // XXX
7681 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7682 ins_encode %{
7683 __ leaq($dst$$Register, $mem$$Address);
7684 %}
7685 ins_pipe(ialu_reg_reg_fat);
7686 %}
7687
7688 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7689 %{
7690 predicate(CompressedOops::shift() == 0);
7691 match(Set dst mem);
7692
7693 ins_cost(110);
7694 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7695 ins_encode %{
7696 __ leaq($dst$$Register, $mem$$Address);
7697 %}
7698 ins_pipe(ialu_reg_reg_fat);
7699 %}
7700
7701 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7702 %{
7703 predicate(CompressedOops::shift() == 0);
7704 match(Set dst mem);
7705
7706 ins_cost(110);
7707 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7708 ins_encode %{
7709 __ leaq($dst$$Register, $mem$$Address);
7710 %}
7711 ins_pipe(ialu_reg_reg_fat);
7712 %}
7713
7714 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7715 %{
7716 predicate(CompressedOops::shift() == 0);
7717 match(Set dst mem);
7718
7719 ins_cost(110);
7720 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7721 ins_encode %{
7722 __ leaq($dst$$Register, $mem$$Address);
7723 %}
7724 ins_pipe(ialu_reg_reg_fat);
7725 %}
7726
7727 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7728 %{
7729 predicate(CompressedOops::shift() == 0);
7730 match(Set dst mem);
7731
7732 ins_cost(110);
7733 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7734 ins_encode %{
7735 __ leaq($dst$$Register, $mem$$Address);
7736 %}
7737 ins_pipe(ialu_reg_reg_fat);
7738 %}
7739
7740 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7741 %{
7742 predicate(CompressedOops::shift() == 0);
7743 match(Set dst mem);
7744
7745 ins_cost(110);
7746 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7747 ins_encode %{
7748 __ leaq($dst$$Register, $mem$$Address);
7749 %}
7750 ins_pipe(ialu_reg_reg_fat);
7751 %}
7752
7753 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7754 %{
7755 predicate(CompressedOops::shift() == 0);
7756 match(Set dst mem);
7757
7758 ins_cost(110);
7759 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7760 ins_encode %{
7761 __ leaq($dst$$Register, $mem$$Address);
7762 %}
7763 ins_pipe(ialu_reg_reg_fat);
7764 %}
7765
7766 instruct loadConI(rRegI dst, immI src)
7767 %{
7768 match(Set dst src);
7769
7770 format %{ "movl $dst, $src\t# int" %}
7771 ins_encode %{
7772 __ movl($dst$$Register, $src$$constant);
7773 %}
7774 ins_pipe(ialu_reg_fat); // XXX
7775 %}
7776
7777 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7778 %{
7779 match(Set dst src);
7780 effect(KILL cr);
7781
7782 ins_cost(50);
7783 format %{ "xorl $dst, $dst\t# int" %}
7784 ins_encode %{
7785 __ xorl($dst$$Register, $dst$$Register);
7786 %}
7787 ins_pipe(ialu_reg);
7788 %}
7789
7790 instruct loadConL(rRegL dst, immL src)
7791 %{
7792 match(Set dst src);
7793
7794 ins_cost(150);
7795 format %{ "movq $dst, $src\t# long" %}
7796 ins_encode %{
7797 __ mov64($dst$$Register, $src$$constant);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7803 %{
7804 match(Set dst src);
7805 effect(KILL cr);
7806
7807 ins_cost(50);
7808 format %{ "xorl $dst, $dst\t# long" %}
7809 ins_encode %{
7810 __ xorl($dst$$Register, $dst$$Register);
7811 %}
7812 ins_pipe(ialu_reg); // XXX
7813 %}
7814
7815 instruct loadConUL32(rRegL dst, immUL32 src)
7816 %{
7817 match(Set dst src);
7818
7819 ins_cost(60);
7820 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7821 ins_encode %{
7822 __ movl($dst$$Register, $src$$constant);
7823 %}
7824 ins_pipe(ialu_reg);
7825 %}
7826
7827 instruct loadConL32(rRegL dst, immL32 src)
7828 %{
7829 match(Set dst src);
7830
7831 ins_cost(70);
7832 format %{ "movq $dst, $src\t# long (32-bit)" %}
7833 ins_encode %{
7834 __ movq($dst$$Register, $src$$constant);
7835 %}
7836 ins_pipe(ialu_reg);
7837 %}
7838
7839 instruct loadConP(rRegP dst, immP con) %{
7840 match(Set dst con);
7841
7842 format %{ "movq $dst, $con\t# ptr" %}
7843 ins_encode %{
7844 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7845 %}
7846 ins_pipe(ialu_reg_fat); // XXX
7847 %}
7848
7849 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7850 %{
7851 match(Set dst src);
7852 effect(KILL cr);
7853
7854 ins_cost(50);
7855 format %{ "xorl $dst, $dst\t# ptr" %}
7856 ins_encode %{
7857 __ xorl($dst$$Register, $dst$$Register);
7858 %}
7859 ins_pipe(ialu_reg);
7860 %}
7861
7862 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7863 %{
7864 match(Set dst src);
7865 effect(KILL cr);
7866
7867 ins_cost(60);
7868 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7869 ins_encode %{
7870 __ movl($dst$$Register, $src$$constant);
7871 %}
7872 ins_pipe(ialu_reg);
7873 %}
7874
7875 instruct loadConF(regF dst, immF con) %{
7876 match(Set dst con);
7877 ins_cost(125);
7878 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7879 ins_encode %{
7880 __ movflt($dst$$XMMRegister, $constantaddress($con));
7881 %}
7882 ins_pipe(pipe_slow);
7883 %}
7884
7885 instruct loadConH(regF dst, immH con) %{
7886 match(Set dst con);
7887 ins_cost(125);
7888 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7889 ins_encode %{
7890 __ movflt($dst$$XMMRegister, $constantaddress($con));
7891 %}
7892 ins_pipe(pipe_slow);
7893 %}
7894
7895 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7896 match(Set dst src);
7897 effect(KILL cr);
7898 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7899 ins_encode %{
7900 __ xorq($dst$$Register, $dst$$Register);
7901 %}
7902 ins_pipe(ialu_reg);
7903 %}
7904
7905 instruct loadConN(rRegN dst, immN src) %{
7906 match(Set dst src);
7907
7908 ins_cost(125);
7909 format %{ "movl $dst, $src\t# compressed ptr" %}
7910 ins_encode %{
7911 address con = (address)$src$$constant;
7912 if (con == nullptr) {
7913 ShouldNotReachHere();
7914 } else {
7915 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7916 }
7917 %}
7918 ins_pipe(ialu_reg_fat); // XXX
7919 %}
7920
7921 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7922 match(Set dst src);
7923
7924 ins_cost(125);
7925 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7926 ins_encode %{
7927 address con = (address)$src$$constant;
7928 if (con == nullptr) {
7929 ShouldNotReachHere();
7930 } else {
7931 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7932 }
7933 %}
7934 ins_pipe(ialu_reg_fat); // XXX
7935 %}
7936
7937 instruct loadConF0(regF dst, immF0 src)
7938 %{
7939 match(Set dst src);
7940 ins_cost(100);
7941
7942 format %{ "xorps $dst, $dst\t# float 0.0" %}
7943 ins_encode %{
7944 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7945 %}
7946 ins_pipe(pipe_slow);
7947 %}
7948
7949 // Use the same format since predicate() can not be used here.
7950 instruct loadConD(regD dst, immD con) %{
7951 match(Set dst con);
7952 ins_cost(125);
7953 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7954 ins_encode %{
7955 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7956 %}
7957 ins_pipe(pipe_slow);
7958 %}
7959
7960 instruct loadConD0(regD dst, immD0 src)
7961 %{
7962 match(Set dst src);
7963 ins_cost(100);
7964
7965 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7966 ins_encode %{
7967 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7968 %}
7969 ins_pipe(pipe_slow);
7970 %}
7971
7972 instruct loadSSI(rRegI dst, stackSlotI src)
7973 %{
7974 match(Set dst src);
7975
7976 ins_cost(125);
7977 format %{ "movl $dst, $src\t# int stk" %}
7978 ins_encode %{
7979 __ movl($dst$$Register, $src$$Address);
7980 %}
7981 ins_pipe(ialu_reg_mem);
7982 %}
7983
7984 instruct loadSSL(rRegL dst, stackSlotL src)
7985 %{
7986 match(Set dst src);
7987
7988 ins_cost(125);
7989 format %{ "movq $dst, $src\t# long stk" %}
7990 ins_encode %{
7991 __ movq($dst$$Register, $src$$Address);
7992 %}
7993 ins_pipe(ialu_reg_mem);
7994 %}
7995
7996 instruct loadSSP(rRegP dst, stackSlotP src)
7997 %{
7998 match(Set dst src);
7999
8000 ins_cost(125);
8001 format %{ "movq $dst, $src\t# ptr stk" %}
8002 ins_encode %{
8003 __ movq($dst$$Register, $src$$Address);
8004 %}
8005 ins_pipe(ialu_reg_mem);
8006 %}
8007
8008 instruct loadSSF(regF dst, stackSlotF src)
8009 %{
8010 match(Set dst src);
8011
8012 ins_cost(125);
8013 format %{ "movss $dst, $src\t# float stk" %}
8014 ins_encode %{
8015 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
8016 %}
8017 ins_pipe(pipe_slow); // XXX
8018 %}
8019
8020 // Use the same format since predicate() can not be used here.
8021 instruct loadSSD(regD dst, stackSlotD src)
8022 %{
8023 match(Set dst src);
8024
8025 ins_cost(125);
8026 format %{ "movsd $dst, $src\t# double stk" %}
8027 ins_encode %{
8028 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
8029 %}
8030 ins_pipe(pipe_slow); // XXX
8031 %}
8032
8033 // Prefetch instructions for allocation.
8034 // Must be safe to execute with invalid address (cannot fault).
8035
8036 instruct prefetchAlloc( memory mem ) %{
8037 predicate(AllocatePrefetchInstr==3);
8038 match(PrefetchAllocation mem);
8039 ins_cost(125);
8040
8041 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
8042 ins_encode %{
8043 __ prefetchw($mem$$Address);
8044 %}
8045 ins_pipe(ialu_mem);
8046 %}
8047
8048 instruct prefetchAllocNTA( memory mem ) %{
8049 predicate(AllocatePrefetchInstr==0);
8050 match(PrefetchAllocation mem);
8051 ins_cost(125);
8052
8053 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
8054 ins_encode %{
8055 __ prefetchnta($mem$$Address);
8056 %}
8057 ins_pipe(ialu_mem);
8058 %}
8059
8060 instruct prefetchAllocT0( memory mem ) %{
8061 predicate(AllocatePrefetchInstr==1);
8062 match(PrefetchAllocation mem);
8063 ins_cost(125);
8064
8065 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8066 ins_encode %{
8067 __ prefetcht0($mem$$Address);
8068 %}
8069 ins_pipe(ialu_mem);
8070 %}
8071
8072 instruct prefetchAllocT2( memory mem ) %{
8073 predicate(AllocatePrefetchInstr==2);
8074 match(PrefetchAllocation mem);
8075 ins_cost(125);
8076
8077 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8078 ins_encode %{
8079 __ prefetcht2($mem$$Address);
8080 %}
8081 ins_pipe(ialu_mem);
8082 %}
8083
8084 //----------Store Instructions-------------------------------------------------
8085
8086 // Store Byte
8087 instruct storeB(memory mem, rRegI src)
8088 %{
8089 match(Set mem (StoreB mem src));
8090
8091 ins_cost(125); // XXX
8092 format %{ "movb $mem, $src\t# byte" %}
8093 ins_encode %{
8094 __ movb($mem$$Address, $src$$Register);
8095 %}
8096 ins_pipe(ialu_mem_reg);
8097 %}
8098
8099 // Store Char/Short
8100 instruct storeC(memory mem, rRegI src)
8101 %{
8102 match(Set mem (StoreC mem src));
8103
8104 ins_cost(125); // XXX
8105 format %{ "movw $mem, $src\t# char/short" %}
8106 ins_encode %{
8107 __ movw($mem$$Address, $src$$Register);
8108 %}
8109 ins_pipe(ialu_mem_reg);
8110 %}
8111
8112 // Store Integer
8113 instruct storeI(memory mem, rRegI src)
8114 %{
8115 match(Set mem (StoreI mem src));
8116
8117 ins_cost(125); // XXX
8118 format %{ "movl $mem, $src\t# int" %}
8119 ins_encode %{
8120 __ movl($mem$$Address, $src$$Register);
8121 %}
8122 ins_pipe(ialu_mem_reg);
8123 %}
8124
8125 // Store Long
8126 instruct storeL(memory mem, rRegL src)
8127 %{
8128 match(Set mem (StoreL mem src));
8129
8130 ins_cost(125); // XXX
8131 format %{ "movq $mem, $src\t# long" %}
8132 ins_encode %{
8133 __ movq($mem$$Address, $src$$Register);
8134 %}
8135 ins_pipe(ialu_mem_reg); // XXX
8136 %}
8137
8138 // Store Pointer
8139 instruct storeP(memory mem, any_RegP src)
8140 %{
8141 predicate(n->as_Store()->barrier_data() == 0);
8142 match(Set mem (StoreP mem src));
8143
8144 ins_cost(125); // XXX
8145 format %{ "movq $mem, $src\t# ptr" %}
8146 ins_encode %{
8147 __ movq($mem$$Address, $src$$Register);
8148 %}
8149 ins_pipe(ialu_mem_reg);
8150 %}
8151
8152 instruct storeImmP0(memory mem, immP0 zero)
8153 %{
8154 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8155 match(Set mem (StoreP mem zero));
8156
8157 ins_cost(125); // XXX
8158 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8159 ins_encode %{
8160 __ movq($mem$$Address, r12);
8161 %}
8162 ins_pipe(ialu_mem_reg);
8163 %}
8164
8165 // Store Null Pointer, mark word, or other simple pointer constant.
8166 instruct storeImmP(memory mem, immP31 src)
8167 %{
8168 predicate(n->as_Store()->barrier_data() == 0);
8169 match(Set mem (StoreP mem src));
8170
8171 ins_cost(150); // XXX
8172 format %{ "movq $mem, $src\t# ptr" %}
8173 ins_encode %{
8174 __ movq($mem$$Address, $src$$constant);
8175 %}
8176 ins_pipe(ialu_mem_imm);
8177 %}
8178
8179 // Store Compressed Pointer
8180 instruct storeN(memory mem, rRegN src)
8181 %{
8182 predicate(n->as_Store()->barrier_data() == 0);
8183 match(Set mem (StoreN mem src));
8184
8185 ins_cost(125); // XXX
8186 format %{ "movl $mem, $src\t# compressed ptr" %}
8187 ins_encode %{
8188 __ movl($mem$$Address, $src$$Register);
8189 %}
8190 ins_pipe(ialu_mem_reg);
8191 %}
8192
8193 instruct storeNKlass(memory mem, rRegN src)
8194 %{
8195 match(Set mem (StoreNKlass mem src));
8196
8197 ins_cost(125); // XXX
8198 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8199 ins_encode %{
8200 __ movl($mem$$Address, $src$$Register);
8201 %}
8202 ins_pipe(ialu_mem_reg);
8203 %}
8204
8205 instruct storeImmN0(memory mem, immN0 zero)
8206 %{
8207 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8208 match(Set mem (StoreN mem zero));
8209
8210 ins_cost(125); // XXX
8211 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8212 ins_encode %{
8213 __ movl($mem$$Address, r12);
8214 %}
8215 ins_pipe(ialu_mem_reg);
8216 %}
8217
8218 instruct storeImmN(memory mem, immN src)
8219 %{
8220 predicate(n->as_Store()->barrier_data() == 0);
8221 match(Set mem (StoreN mem src));
8222
8223 ins_cost(150); // XXX
8224 format %{ "movl $mem, $src\t# compressed ptr" %}
8225 ins_encode %{
8226 address con = (address)$src$$constant;
8227 if (con == nullptr) {
8228 __ movl($mem$$Address, 0);
8229 } else {
8230 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8231 }
8232 %}
8233 ins_pipe(ialu_mem_imm);
8234 %}
8235
8236 instruct storeImmNKlass(memory mem, immNKlass src)
8237 %{
8238 match(Set mem (StoreNKlass mem src));
8239
8240 ins_cost(150); // XXX
8241 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8242 ins_encode %{
8243 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8244 %}
8245 ins_pipe(ialu_mem_imm);
8246 %}
8247
8248 // Store Integer Immediate
8249 instruct storeImmI0(memory mem, immI_0 zero)
8250 %{
8251 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8252 match(Set mem (StoreI mem zero));
8253
8254 ins_cost(125); // XXX
8255 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8256 ins_encode %{
8257 __ movl($mem$$Address, r12);
8258 %}
8259 ins_pipe(ialu_mem_reg);
8260 %}
8261
8262 instruct storeImmI(memory mem, immI src)
8263 %{
8264 match(Set mem (StoreI mem src));
8265
8266 ins_cost(150);
8267 format %{ "movl $mem, $src\t# int" %}
8268 ins_encode %{
8269 __ movl($mem$$Address, $src$$constant);
8270 %}
8271 ins_pipe(ialu_mem_imm);
8272 %}
8273
8274 // Store Long Immediate
8275 instruct storeImmL0(memory mem, immL0 zero)
8276 %{
8277 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8278 match(Set mem (StoreL mem zero));
8279
8280 ins_cost(125); // XXX
8281 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8282 ins_encode %{
8283 __ movq($mem$$Address, r12);
8284 %}
8285 ins_pipe(ialu_mem_reg);
8286 %}
8287
8288 instruct storeImmL(memory mem, immL32 src)
8289 %{
8290 match(Set mem (StoreL mem src));
8291
8292 ins_cost(150);
8293 format %{ "movq $mem, $src\t# long" %}
8294 ins_encode %{
8295 __ movq($mem$$Address, $src$$constant);
8296 %}
8297 ins_pipe(ialu_mem_imm);
8298 %}
8299
8300 // Store Short/Char Immediate
8301 instruct storeImmC0(memory mem, immI_0 zero)
8302 %{
8303 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8304 match(Set mem (StoreC mem zero));
8305
8306 ins_cost(125); // XXX
8307 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8308 ins_encode %{
8309 __ movw($mem$$Address, r12);
8310 %}
8311 ins_pipe(ialu_mem_reg);
8312 %}
8313
8314 instruct storeImmI16(memory mem, immI16 src)
8315 %{
8316 predicate(UseStoreImmI16);
8317 match(Set mem (StoreC mem src));
8318
8319 ins_cost(150);
8320 format %{ "movw $mem, $src\t# short/char" %}
8321 ins_encode %{
8322 __ movw($mem$$Address, $src$$constant);
8323 %}
8324 ins_pipe(ialu_mem_imm);
8325 %}
8326
8327 // Store Byte Immediate
8328 instruct storeImmB0(memory mem, immI_0 zero)
8329 %{
8330 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8331 match(Set mem (StoreB mem zero));
8332
8333 ins_cost(125); // XXX
8334 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8335 ins_encode %{
8336 __ movb($mem$$Address, r12);
8337 %}
8338 ins_pipe(ialu_mem_reg);
8339 %}
8340
8341 instruct storeImmB(memory mem, immI8 src)
8342 %{
8343 match(Set mem (StoreB mem src));
8344
8345 ins_cost(150); // XXX
8346 format %{ "movb $mem, $src\t# byte" %}
8347 ins_encode %{
8348 __ movb($mem$$Address, $src$$constant);
8349 %}
8350 ins_pipe(ialu_mem_imm);
8351 %}
8352
8353 // Store Float
8354 instruct storeF(memory mem, regF src)
8355 %{
8356 match(Set mem (StoreF mem src));
8357
8358 ins_cost(95); // XXX
8359 format %{ "movss $mem, $src\t# float" %}
8360 ins_encode %{
8361 __ movflt($mem$$Address, $src$$XMMRegister);
8362 %}
8363 ins_pipe(pipe_slow); // XXX
8364 %}
8365
8366 // Store immediate Float value (it is faster than store from XMM register)
8367 instruct storeF0(memory mem, immF0 zero)
8368 %{
8369 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8370 match(Set mem (StoreF mem zero));
8371
8372 ins_cost(25); // XXX
8373 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8374 ins_encode %{
8375 __ movl($mem$$Address, r12);
8376 %}
8377 ins_pipe(ialu_mem_reg);
8378 %}
8379
8380 instruct storeF_imm(memory mem, immF src)
8381 %{
8382 match(Set mem (StoreF mem src));
8383
8384 ins_cost(50);
8385 format %{ "movl $mem, $src\t# float" %}
8386 ins_encode %{
8387 __ movl($mem$$Address, jint_cast($src$$constant));
8388 %}
8389 ins_pipe(ialu_mem_imm);
8390 %}
8391
8392 // Store Double
8393 instruct storeD(memory mem, regD src)
8394 %{
8395 match(Set mem (StoreD mem src));
8396
8397 ins_cost(95); // XXX
8398 format %{ "movsd $mem, $src\t# double" %}
8399 ins_encode %{
8400 __ movdbl($mem$$Address, $src$$XMMRegister);
8401 %}
8402 ins_pipe(pipe_slow); // XXX
8403 %}
8404
8405 // Store immediate double 0.0 (it is faster than store from XMM register)
8406 instruct storeD0_imm(memory mem, immD0 src)
8407 %{
8408 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8409 match(Set mem (StoreD mem src));
8410
8411 ins_cost(50);
8412 format %{ "movq $mem, $src\t# double 0." %}
8413 ins_encode %{
8414 __ movq($mem$$Address, $src$$constant);
8415 %}
8416 ins_pipe(ialu_mem_imm);
8417 %}
8418
8419 instruct storeD0(memory mem, immD0 zero)
8420 %{
8421 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8422 match(Set mem (StoreD mem zero));
8423
8424 ins_cost(25); // XXX
8425 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8426 ins_encode %{
8427 __ movq($mem$$Address, r12);
8428 %}
8429 ins_pipe(ialu_mem_reg);
8430 %}
8431
8432 instruct storeSSI(stackSlotI dst, rRegI src)
8433 %{
8434 match(Set dst src);
8435
8436 ins_cost(100);
8437 format %{ "movl $dst, $src\t# int stk" %}
8438 ins_encode %{
8439 __ movl($dst$$Address, $src$$Register);
8440 %}
8441 ins_pipe( ialu_mem_reg );
8442 %}
8443
8444 instruct storeSSL(stackSlotL dst, rRegL src)
8445 %{
8446 match(Set dst src);
8447
8448 ins_cost(100);
8449 format %{ "movq $dst, $src\t# long stk" %}
8450 ins_encode %{
8451 __ movq($dst$$Address, $src$$Register);
8452 %}
8453 ins_pipe(ialu_mem_reg);
8454 %}
8455
8456 instruct storeSSP(stackSlotP dst, rRegP src)
8457 %{
8458 match(Set dst src);
8459
8460 ins_cost(100);
8461 format %{ "movq $dst, $src\t# ptr stk" %}
8462 ins_encode %{
8463 __ movq($dst$$Address, $src$$Register);
8464 %}
8465 ins_pipe(ialu_mem_reg);
8466 %}
8467
8468 instruct storeSSF(stackSlotF dst, regF src)
8469 %{
8470 match(Set dst src);
8471
8472 ins_cost(95); // XXX
8473 format %{ "movss $dst, $src\t# float stk" %}
8474 ins_encode %{
8475 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8476 %}
8477 ins_pipe(pipe_slow); // XXX
8478 %}
8479
8480 instruct storeSSD(stackSlotD dst, regD src)
8481 %{
8482 match(Set dst src);
8483
8484 ins_cost(95); // XXX
8485 format %{ "movsd $dst, $src\t# double stk" %}
8486 ins_encode %{
8487 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8488 %}
8489 ins_pipe(pipe_slow); // XXX
8490 %}
8491
8492 instruct cacheWB(indirect addr)
8493 %{
8494 predicate(VM_Version::supports_data_cache_line_flush());
8495 match(CacheWB addr);
8496
8497 ins_cost(100);
8498 format %{"cache wb $addr" %}
8499 ins_encode %{
8500 assert($addr->index_position() < 0, "should be");
8501 assert($addr$$disp == 0, "should be");
8502 __ cache_wb(Address($addr$$base$$Register, 0));
8503 %}
8504 ins_pipe(pipe_slow); // XXX
8505 %}
8506
8507 instruct cacheWBPreSync()
8508 %{
8509 predicate(VM_Version::supports_data_cache_line_flush());
8510 match(CacheWBPreSync);
8511
8512 ins_cost(100);
8513 format %{"cache wb presync" %}
8514 ins_encode %{
8515 __ cache_wbsync(true);
8516 %}
8517 ins_pipe(pipe_slow); // XXX
8518 %}
8519
8520 instruct cacheWBPostSync()
8521 %{
8522 predicate(VM_Version::supports_data_cache_line_flush());
8523 match(CacheWBPostSync);
8524
8525 ins_cost(100);
8526 format %{"cache wb postsync" %}
8527 ins_encode %{
8528 __ cache_wbsync(false);
8529 %}
8530 ins_pipe(pipe_slow); // XXX
8531 %}
8532
8533 //----------BSWAP Instructions-------------------------------------------------
8534 instruct bytes_reverse_int(rRegI dst) %{
8535 match(Set dst (ReverseBytesI dst));
8536
8537 format %{ "bswapl $dst" %}
8538 ins_encode %{
8539 __ bswapl($dst$$Register);
8540 %}
8541 ins_pipe( ialu_reg );
8542 %}
8543
8544 instruct bytes_reverse_long(rRegL dst) %{
8545 match(Set dst (ReverseBytesL dst));
8546
8547 format %{ "bswapq $dst" %}
8548 ins_encode %{
8549 __ bswapq($dst$$Register);
8550 %}
8551 ins_pipe( ialu_reg);
8552 %}
8553
8554 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8555 match(Set dst (ReverseBytesUS dst));
8556 effect(KILL cr);
8557
8558 format %{ "bswapl $dst\n\t"
8559 "shrl $dst,16\n\t" %}
8560 ins_encode %{
8561 __ bswapl($dst$$Register);
8562 __ shrl($dst$$Register, 16);
8563 %}
8564 ins_pipe( ialu_reg );
8565 %}
8566
8567 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8568 match(Set dst (ReverseBytesS dst));
8569 effect(KILL cr);
8570
8571 format %{ "bswapl $dst\n\t"
8572 "sar $dst,16\n\t" %}
8573 ins_encode %{
8574 __ bswapl($dst$$Register);
8575 __ sarl($dst$$Register, 16);
8576 %}
8577 ins_pipe( ialu_reg );
8578 %}
8579
8580 //---------- Zeros Count Instructions ------------------------------------------
8581
8582 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8583 predicate(UseCountLeadingZerosInstruction);
8584 match(Set dst (CountLeadingZerosI src));
8585 effect(KILL cr);
8586
8587 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8588 ins_encode %{
8589 __ lzcntl($dst$$Register, $src$$Register);
8590 %}
8591 ins_pipe(ialu_reg);
8592 %}
8593
8594 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8595 predicate(UseCountLeadingZerosInstruction);
8596 match(Set dst (CountLeadingZerosI (LoadI src)));
8597 effect(KILL cr);
8598 ins_cost(175);
8599 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8600 ins_encode %{
8601 __ lzcntl($dst$$Register, $src$$Address);
8602 %}
8603 ins_pipe(ialu_reg_mem);
8604 %}
8605
8606 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8607 predicate(!UseCountLeadingZerosInstruction);
8608 match(Set dst (CountLeadingZerosI src));
8609 effect(KILL cr);
8610
8611 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8612 "jnz skip\n\t"
8613 "movl $dst, -1\n"
8614 "skip:\n\t"
8615 "negl $dst\n\t"
8616 "addl $dst, 31" %}
8617 ins_encode %{
8618 Register Rdst = $dst$$Register;
8619 Register Rsrc = $src$$Register;
8620 Label skip;
8621 __ bsrl(Rdst, Rsrc);
8622 __ jccb(Assembler::notZero, skip);
8623 __ movl(Rdst, -1);
8624 __ bind(skip);
8625 __ negl(Rdst);
8626 __ addl(Rdst, BitsPerInt - 1);
8627 %}
8628 ins_pipe(ialu_reg);
8629 %}
8630
8631 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8632 predicate(UseCountLeadingZerosInstruction);
8633 match(Set dst (CountLeadingZerosL src));
8634 effect(KILL cr);
8635
8636 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8637 ins_encode %{
8638 __ lzcntq($dst$$Register, $src$$Register);
8639 %}
8640 ins_pipe(ialu_reg);
8641 %}
8642
8643 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8644 predicate(UseCountLeadingZerosInstruction);
8645 match(Set dst (CountLeadingZerosL (LoadL src)));
8646 effect(KILL cr);
8647 ins_cost(175);
8648 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8649 ins_encode %{
8650 __ lzcntq($dst$$Register, $src$$Address);
8651 %}
8652 ins_pipe(ialu_reg_mem);
8653 %}
8654
8655 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8656 predicate(!UseCountLeadingZerosInstruction);
8657 match(Set dst (CountLeadingZerosL src));
8658 effect(KILL cr);
8659
8660 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8661 "jnz skip\n\t"
8662 "movl $dst, -1\n"
8663 "skip:\n\t"
8664 "negl $dst\n\t"
8665 "addl $dst, 63" %}
8666 ins_encode %{
8667 Register Rdst = $dst$$Register;
8668 Register Rsrc = $src$$Register;
8669 Label skip;
8670 __ bsrq(Rdst, Rsrc);
8671 __ jccb(Assembler::notZero, skip);
8672 __ movl(Rdst, -1);
8673 __ bind(skip);
8674 __ negl(Rdst);
8675 __ addl(Rdst, BitsPerLong - 1);
8676 %}
8677 ins_pipe(ialu_reg);
8678 %}
8679
8680 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8681 predicate(UseCountTrailingZerosInstruction);
8682 match(Set dst (CountTrailingZerosI src));
8683 effect(KILL cr);
8684
8685 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8686 ins_encode %{
8687 __ tzcntl($dst$$Register, $src$$Register);
8688 %}
8689 ins_pipe(ialu_reg);
8690 %}
8691
8692 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8693 predicate(UseCountTrailingZerosInstruction);
8694 match(Set dst (CountTrailingZerosI (LoadI src)));
8695 effect(KILL cr);
8696 ins_cost(175);
8697 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8698 ins_encode %{
8699 __ tzcntl($dst$$Register, $src$$Address);
8700 %}
8701 ins_pipe(ialu_reg_mem);
8702 %}
8703
8704 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8705 predicate(!UseCountTrailingZerosInstruction);
8706 match(Set dst (CountTrailingZerosI src));
8707 effect(KILL cr);
8708
8709 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8710 "jnz done\n\t"
8711 "movl $dst, 32\n"
8712 "done:" %}
8713 ins_encode %{
8714 Register Rdst = $dst$$Register;
8715 Label done;
8716 __ bsfl(Rdst, $src$$Register);
8717 __ jccb(Assembler::notZero, done);
8718 __ movl(Rdst, BitsPerInt);
8719 __ bind(done);
8720 %}
8721 ins_pipe(ialu_reg);
8722 %}
8723
8724 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8725 predicate(UseCountTrailingZerosInstruction);
8726 match(Set dst (CountTrailingZerosL src));
8727 effect(KILL cr);
8728
8729 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8730 ins_encode %{
8731 __ tzcntq($dst$$Register, $src$$Register);
8732 %}
8733 ins_pipe(ialu_reg);
8734 %}
8735
8736 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8737 predicate(UseCountTrailingZerosInstruction);
8738 match(Set dst (CountTrailingZerosL (LoadL src)));
8739 effect(KILL cr);
8740 ins_cost(175);
8741 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8742 ins_encode %{
8743 __ tzcntq($dst$$Register, $src$$Address);
8744 %}
8745 ins_pipe(ialu_reg_mem);
8746 %}
8747
8748 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8749 predicate(!UseCountTrailingZerosInstruction);
8750 match(Set dst (CountTrailingZerosL src));
8751 effect(KILL cr);
8752
8753 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8754 "jnz done\n\t"
8755 "movl $dst, 64\n"
8756 "done:" %}
8757 ins_encode %{
8758 Register Rdst = $dst$$Register;
8759 Label done;
8760 __ bsfq(Rdst, $src$$Register);
8761 __ jccb(Assembler::notZero, done);
8762 __ movl(Rdst, BitsPerLong);
8763 __ bind(done);
8764 %}
8765 ins_pipe(ialu_reg);
8766 %}
8767
8768 //--------------- Reverse Operation Instructions ----------------
8769 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8770 predicate(!VM_Version::supports_gfni());
8771 match(Set dst (ReverseI src));
8772 effect(TEMP dst, TEMP rtmp, KILL cr);
8773 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8774 ins_encode %{
8775 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8776 %}
8777 ins_pipe( ialu_reg );
8778 %}
8779
8780 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8781 predicate(VM_Version::supports_gfni());
8782 match(Set dst (ReverseI src));
8783 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8784 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8785 ins_encode %{
8786 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8787 %}
8788 ins_pipe( ialu_reg );
8789 %}
8790
8791 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8792 predicate(!VM_Version::supports_gfni());
8793 match(Set dst (ReverseL src));
8794 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8795 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8796 ins_encode %{
8797 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8798 %}
8799 ins_pipe( ialu_reg );
8800 %}
8801
8802 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8803 predicate(VM_Version::supports_gfni());
8804 match(Set dst (ReverseL src));
8805 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8806 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8807 ins_encode %{
8808 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8809 %}
8810 ins_pipe( ialu_reg );
8811 %}
8812
8813 //---------- Population Count Instructions -------------------------------------
8814
8815 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8816 predicate(UsePopCountInstruction);
8817 match(Set dst (PopCountI src));
8818 effect(KILL cr);
8819
8820 format %{ "popcnt $dst, $src" %}
8821 ins_encode %{
8822 __ popcntl($dst$$Register, $src$$Register);
8823 %}
8824 ins_pipe(ialu_reg);
8825 %}
8826
8827 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8828 predicate(UsePopCountInstruction);
8829 match(Set dst (PopCountI (LoadI mem)));
8830 effect(KILL cr);
8831
8832 format %{ "popcnt $dst, $mem" %}
8833 ins_encode %{
8834 __ popcntl($dst$$Register, $mem$$Address);
8835 %}
8836 ins_pipe(ialu_reg);
8837 %}
8838
8839 // Note: Long.bitCount(long) returns an int.
8840 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8841 predicate(UsePopCountInstruction);
8842 match(Set dst (PopCountL src));
8843 effect(KILL cr);
8844
8845 format %{ "popcnt $dst, $src" %}
8846 ins_encode %{
8847 __ popcntq($dst$$Register, $src$$Register);
8848 %}
8849 ins_pipe(ialu_reg);
8850 %}
8851
8852 // Note: Long.bitCount(long) returns an int.
8853 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8854 predicate(UsePopCountInstruction);
8855 match(Set dst (PopCountL (LoadL mem)));
8856 effect(KILL cr);
8857
8858 format %{ "popcnt $dst, $mem" %}
8859 ins_encode %{
8860 __ popcntq($dst$$Register, $mem$$Address);
8861 %}
8862 ins_pipe(ialu_reg);
8863 %}
8864
8865
8866 //----------MemBar Instructions-----------------------------------------------
8867 // Memory barrier flavors
8868
8869 instruct membar_acquire()
8870 %{
8871 match(MemBarAcquire);
8872 match(LoadFence);
8873 ins_cost(0);
8874
8875 size(0);
8876 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8877 ins_encode();
8878 ins_pipe(empty);
8879 %}
8880
8881 instruct membar_acquire_lock()
8882 %{
8883 match(MemBarAcquireLock);
8884 ins_cost(0);
8885
8886 size(0);
8887 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8888 ins_encode();
8889 ins_pipe(empty);
8890 %}
8891
8892 instruct membar_release()
8893 %{
8894 match(MemBarRelease);
8895 match(StoreFence);
8896 ins_cost(0);
8897
8898 size(0);
8899 format %{ "MEMBAR-release ! (empty encoding)" %}
8900 ins_encode();
8901 ins_pipe(empty);
8902 %}
8903
8904 instruct membar_release_lock()
8905 %{
8906 match(MemBarReleaseLock);
8907 ins_cost(0);
8908
8909 size(0);
8910 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8911 ins_encode();
8912 ins_pipe(empty);
8913 %}
8914
8915 instruct membar_volatile(rFlagsReg cr) %{
8916 match(MemBarVolatile);
8917 effect(KILL cr);
8918 ins_cost(400);
8919
8920 format %{
8921 $$template
8922 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8923 %}
8924 ins_encode %{
8925 __ membar(Assembler::StoreLoad);
8926 %}
8927 ins_pipe(pipe_slow);
8928 %}
8929
8930 instruct unnecessary_membar_volatile()
8931 %{
8932 match(MemBarVolatile);
8933 predicate(Matcher::post_store_load_barrier(n));
8934 ins_cost(0);
8935
8936 size(0);
8937 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8938 ins_encode();
8939 ins_pipe(empty);
8940 %}
8941
8942 instruct membar_storestore() %{
8943 match(MemBarStoreStore);
8944 match(StoreStoreFence);
8945 ins_cost(0);
8946
8947 size(0);
8948 format %{ "MEMBAR-storestore (empty encoding)" %}
8949 ins_encode( );
8950 ins_pipe(empty);
8951 %}
8952
8953 //----------Move Instructions--------------------------------------------------
8954
8955 instruct castX2P(rRegP dst, rRegL src)
8956 %{
8957 match(Set dst (CastX2P src));
8958
8959 format %{ "movq $dst, $src\t# long->ptr" %}
8960 ins_encode %{
8961 if ($dst$$reg != $src$$reg) {
8962 __ movptr($dst$$Register, $src$$Register);
8963 }
8964 %}
8965 ins_pipe(ialu_reg_reg); // XXX
8966 %}
8967
8968 instruct castI2N(rRegN dst, rRegI src)
8969 %{
8970 match(Set dst (CastI2N src));
8971
8972 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8973 ins_encode %{
8974 if ($dst$$reg != $src$$reg) {
8975 __ movl($dst$$Register, $src$$Register);
8976 }
8977 %}
8978 ins_pipe(ialu_reg_reg); // XXX
8979 %}
8980
8981 instruct castN2X(rRegL dst, rRegN src)
8982 %{
8983 match(Set dst (CastP2X src));
8984
8985 format %{ "movq $dst, $src\t# ptr -> long" %}
8986 ins_encode %{
8987 if ($dst$$reg != $src$$reg) {
8988 __ movptr($dst$$Register, $src$$Register);
8989 }
8990 %}
8991 ins_pipe(ialu_reg_reg); // XXX
8992 %}
8993
8994 instruct castP2X(rRegL dst, rRegP src)
8995 %{
8996 match(Set dst (CastP2X src));
8997
8998 format %{ "movq $dst, $src\t# ptr -> long" %}
8999 ins_encode %{
9000 if ($dst$$reg != $src$$reg) {
9001 __ movptr($dst$$Register, $src$$Register);
9002 }
9003 %}
9004 ins_pipe(ialu_reg_reg); // XXX
9005 %}
9006
9007 // Convert oop into int for vectors alignment masking
9008 instruct convP2I(rRegI dst, rRegP src)
9009 %{
9010 match(Set dst (ConvL2I (CastP2X src)));
9011
9012 format %{ "movl $dst, $src\t# ptr -> int" %}
9013 ins_encode %{
9014 __ movl($dst$$Register, $src$$Register);
9015 %}
9016 ins_pipe(ialu_reg_reg); // XXX
9017 %}
9018
9019 // Convert compressed oop into int for vectors alignment masking
9020 // in case of 32bit oops (heap < 4Gb).
9021 instruct convN2I(rRegI dst, rRegN src)
9022 %{
9023 predicate(CompressedOops::shift() == 0);
9024 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9025
9026 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
9027 ins_encode %{
9028 __ movl($dst$$Register, $src$$Register);
9029 %}
9030 ins_pipe(ialu_reg_reg); // XXX
9031 %}
9032
9033 // Convert oop pointer into compressed form
9034 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
9035 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9036 match(Set dst (EncodeP src));
9037 effect(KILL cr);
9038 format %{ "encode_heap_oop $dst,$src" %}
9039 ins_encode %{
9040 Register s = $src$$Register;
9041 Register d = $dst$$Register;
9042 if (s != d) {
9043 __ movq(d, s);
9044 }
9045 __ encode_heap_oop(d);
9046 %}
9047 ins_pipe(ialu_reg_long);
9048 %}
9049
9050 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9051 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9052 match(Set dst (EncodeP src));
9053 effect(KILL cr);
9054 format %{ "encode_heap_oop_not_null $dst,$src" %}
9055 ins_encode %{
9056 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9057 %}
9058 ins_pipe(ialu_reg_long);
9059 %}
9060
9061 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9062 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9063 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9064 match(Set dst (DecodeN src));
9065 effect(KILL cr);
9066 format %{ "decode_heap_oop $dst,$src" %}
9067 ins_encode %{
9068 Register s = $src$$Register;
9069 Register d = $dst$$Register;
9070 if (s != d) {
9071 __ movq(d, s);
9072 }
9073 __ decode_heap_oop(d);
9074 %}
9075 ins_pipe(ialu_reg_long);
9076 %}
9077
9078 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9079 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9080 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9081 match(Set dst (DecodeN src));
9082 effect(KILL cr);
9083 format %{ "decode_heap_oop_not_null $dst,$src" %}
9084 ins_encode %{
9085 Register s = $src$$Register;
9086 Register d = $dst$$Register;
9087 if (s != d) {
9088 __ decode_heap_oop_not_null(d, s);
9089 } else {
9090 __ decode_heap_oop_not_null(d);
9091 }
9092 %}
9093 ins_pipe(ialu_reg_long);
9094 %}
9095
9096 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9097 match(Set dst (EncodePKlass src));
9098 effect(TEMP dst, KILL cr);
9099 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9100 ins_encode %{
9101 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9102 %}
9103 ins_pipe(ialu_reg_long);
9104 %}
9105
9106 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9107 match(Set dst (DecodeNKlass src));
9108 effect(TEMP dst, KILL cr);
9109 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9110 ins_encode %{
9111 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9112 %}
9113 ins_pipe(ialu_reg_long);
9114 %}
9115
9116 //----------Conditional Move---------------------------------------------------
9117 // Jump
9118 // dummy instruction for generating temp registers
9119 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9120 match(Jump (LShiftL switch_val shift));
9121 ins_cost(350);
9122 predicate(false);
9123 effect(TEMP dest);
9124
9125 format %{ "leaq $dest, [$constantaddress]\n\t"
9126 "jmp [$dest + $switch_val << $shift]\n\t" %}
9127 ins_encode %{
9128 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9129 // to do that and the compiler is using that register as one it can allocate.
9130 // So we build it all by hand.
9131 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9132 // ArrayAddress dispatch(table, index);
9133 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9134 __ lea($dest$$Register, $constantaddress);
9135 __ jmp(dispatch);
9136 %}
9137 ins_pipe(pipe_jmp);
9138 %}
9139
9140 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9141 match(Jump (AddL (LShiftL switch_val shift) offset));
9142 ins_cost(350);
9143 effect(TEMP dest);
9144
9145 format %{ "leaq $dest, [$constantaddress]\n\t"
9146 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9147 ins_encode %{
9148 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9149 // to do that and the compiler is using that register as one it can allocate.
9150 // So we build it all by hand.
9151 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9152 // ArrayAddress dispatch(table, index);
9153 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9154 __ lea($dest$$Register, $constantaddress);
9155 __ jmp(dispatch);
9156 %}
9157 ins_pipe(pipe_jmp);
9158 %}
9159
9160 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9161 match(Jump switch_val);
9162 ins_cost(350);
9163 effect(TEMP dest);
9164
9165 format %{ "leaq $dest, [$constantaddress]\n\t"
9166 "jmp [$dest + $switch_val]\n\t" %}
9167 ins_encode %{
9168 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9169 // to do that and the compiler is using that register as one it can allocate.
9170 // So we build it all by hand.
9171 // Address index(noreg, switch_reg, Address::times_1);
9172 // ArrayAddress dispatch(table, index);
9173 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9174 __ lea($dest$$Register, $constantaddress);
9175 __ jmp(dispatch);
9176 %}
9177 ins_pipe(pipe_jmp);
9178 %}
9179
9180 // Conditional move
9181 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9182 %{
9183 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9184 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9185
9186 ins_cost(100); // XXX
9187 format %{ "setbn$cop $dst\t# signed, int" %}
9188 ins_encode %{
9189 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9190 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9191 %}
9192 ins_pipe(ialu_reg);
9193 %}
9194
9195 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9196 %{
9197 predicate(!UseAPX);
9198 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9199
9200 ins_cost(200); // XXX
9201 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9202 ins_encode %{
9203 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9204 %}
9205 ins_pipe(pipe_cmov_reg);
9206 %}
9207
9208 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9209 %{
9210 predicate(UseAPX);
9211 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9212
9213 ins_cost(200);
9214 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9215 ins_encode %{
9216 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9217 %}
9218 ins_pipe(pipe_cmov_reg);
9219 %}
9220
9221 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9222 %{
9223 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9224 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9225
9226 ins_cost(100); // XXX
9227 format %{ "setbn$cop $dst\t# unsigned, int" %}
9228 ins_encode %{
9229 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9230 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9231 %}
9232 ins_pipe(ialu_reg);
9233 %}
9234
9235 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9236 predicate(!UseAPX);
9237 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9238
9239 ins_cost(200); // XXX
9240 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9241 ins_encode %{
9242 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9243 %}
9244 ins_pipe(pipe_cmov_reg);
9245 %}
9246
9247 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9248 predicate(UseAPX);
9249 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9250
9251 ins_cost(200);
9252 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9253 ins_encode %{
9254 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9255 %}
9256 ins_pipe(pipe_cmov_reg);
9257 %}
9258
9259 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9260 %{
9261 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9262 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9263
9264 ins_cost(100); // XXX
9265 format %{ "setbn$cop $dst\t# unsigned, int" %}
9266 ins_encode %{
9267 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9268 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9269 %}
9270 ins_pipe(ialu_reg);
9271 %}
9272
9273 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9274 %{
9275 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9276 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9277
9278 ins_cost(100); // XXX
9279 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9280 ins_encode %{
9281 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9282 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9283 %}
9284 ins_pipe(ialu_reg);
9285 %}
9286
9287 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9288 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9289
9290 ins_cost(200);
9291 expand %{
9292 cmovI_regU(cop, cr, dst, src);
9293 %}
9294 %}
9295
9296 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9297 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9298
9299 ins_cost(200);
9300 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9301 ins_encode %{
9302 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9303 %}
9304 ins_pipe(pipe_cmov_reg);
9305 %}
9306
9307 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9308 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9309 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9310
9311 ins_cost(200); // XXX
9312 format %{ "cmovpl $dst, $src\n\t"
9313 "cmovnel $dst, $src" %}
9314 ins_encode %{
9315 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9316 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9317 %}
9318 ins_pipe(pipe_cmov_reg);
9319 %}
9320
9321 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9322 // inputs of the CMove
9323 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9324 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9325 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9326 effect(TEMP dst);
9327
9328 ins_cost(200); // XXX
9329 format %{ "cmovpl $dst, $src\n\t"
9330 "cmovnel $dst, $src" %}
9331 ins_encode %{
9332 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9333 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9334 %}
9335 ins_pipe(pipe_cmov_reg);
9336 %}
9337
9338 // Conditional move
9339 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9340 predicate(!UseAPX);
9341 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9342
9343 ins_cost(250); // XXX
9344 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9345 ins_encode %{
9346 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9347 %}
9348 ins_pipe(pipe_cmov_mem);
9349 %}
9350
9351 // Conditional move
9352 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9353 %{
9354 predicate(UseAPX);
9355 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9356
9357 ins_cost(250);
9358 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9359 ins_encode %{
9360 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9361 %}
9362 ins_pipe(pipe_cmov_mem);
9363 %}
9364
9365 // Conditional move
9366 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9367 %{
9368 predicate(!UseAPX);
9369 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9370
9371 ins_cost(250); // XXX
9372 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9373 ins_encode %{
9374 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9375 %}
9376 ins_pipe(pipe_cmov_mem);
9377 %}
9378
9379 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9380 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9381
9382 ins_cost(250);
9383 expand %{
9384 cmovI_memU(cop, cr, dst, src);
9385 %}
9386 %}
9387
9388 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9389 %{
9390 predicate(UseAPX);
9391 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9392
9393 ins_cost(250);
9394 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9395 ins_encode %{
9396 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9397 %}
9398 ins_pipe(pipe_cmov_mem);
9399 %}
9400
9401 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9402 %{
9403 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9404
9405 ins_cost(250);
9406 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9407 ins_encode %{
9408 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9409 %}
9410 ins_pipe(pipe_cmov_mem);
9411 %}
9412
9413 // Conditional move
9414 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9415 %{
9416 predicate(!UseAPX);
9417 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9418
9419 ins_cost(200); // XXX
9420 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9421 ins_encode %{
9422 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9423 %}
9424 ins_pipe(pipe_cmov_reg);
9425 %}
9426
9427 // Conditional move ndd
9428 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9429 %{
9430 predicate(UseAPX);
9431 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9432
9433 ins_cost(200);
9434 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9435 ins_encode %{
9436 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9437 %}
9438 ins_pipe(pipe_cmov_reg);
9439 %}
9440
9441 // Conditional move
9442 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9443 %{
9444 predicate(!UseAPX);
9445 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9446
9447 ins_cost(200); // XXX
9448 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9449 ins_encode %{
9450 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9451 %}
9452 ins_pipe(pipe_cmov_reg);
9453 %}
9454
9455 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9456 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9457
9458 ins_cost(200);
9459 expand %{
9460 cmovN_regU(cop, cr, dst, src);
9461 %}
9462 %}
9463
9464 // Conditional move ndd
9465 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9466 %{
9467 predicate(UseAPX);
9468 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9469
9470 ins_cost(200);
9471 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9472 ins_encode %{
9473 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9474 %}
9475 ins_pipe(pipe_cmov_reg);
9476 %}
9477
9478 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9479 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9480
9481 ins_cost(200);
9482 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9483 ins_encode %{
9484 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9485 %}
9486 ins_pipe(pipe_cmov_reg);
9487 %}
9488
9489 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9490 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9491 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9492
9493 ins_cost(200); // XXX
9494 format %{ "cmovpl $dst, $src\n\t"
9495 "cmovnel $dst, $src" %}
9496 ins_encode %{
9497 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9498 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9499 %}
9500 ins_pipe(pipe_cmov_reg);
9501 %}
9502
9503 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9504 // inputs of the CMove
9505 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9506 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9507 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9508
9509 ins_cost(200); // XXX
9510 format %{ "cmovpl $dst, $src\n\t"
9511 "cmovnel $dst, $src" %}
9512 ins_encode %{
9513 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9514 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9515 %}
9516 ins_pipe(pipe_cmov_reg);
9517 %}
9518
9519 // Conditional move
9520 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9521 %{
9522 predicate(!UseAPX);
9523 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9524
9525 ins_cost(200); // XXX
9526 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9527 ins_encode %{
9528 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9529 %}
9530 ins_pipe(pipe_cmov_reg); // XXX
9531 %}
9532
9533 // Conditional move ndd
9534 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9535 %{
9536 predicate(UseAPX);
9537 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9538
9539 ins_cost(200);
9540 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9541 ins_encode %{
9542 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9543 %}
9544 ins_pipe(pipe_cmov_reg);
9545 %}
9546
9547 // Conditional move
9548 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9549 %{
9550 predicate(!UseAPX);
9551 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9552
9553 ins_cost(200); // XXX
9554 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9555 ins_encode %{
9556 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9557 %}
9558 ins_pipe(pipe_cmov_reg); // XXX
9559 %}
9560
9561 // Conditional move ndd
9562 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9563 %{
9564 predicate(UseAPX);
9565 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9566
9567 ins_cost(200);
9568 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9569 ins_encode %{
9570 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9571 %}
9572 ins_pipe(pipe_cmov_reg);
9573 %}
9574
9575 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9576 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9577
9578 ins_cost(200);
9579 expand %{
9580 cmovP_regU(cop, cr, dst, src);
9581 %}
9582 %}
9583
9584 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9585 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9586
9587 ins_cost(200);
9588 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9589 ins_encode %{
9590 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9591 %}
9592 ins_pipe(pipe_cmov_reg);
9593 %}
9594
9595 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9596 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9597 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9598
9599 ins_cost(200); // XXX
9600 format %{ "cmovpq $dst, $src\n\t"
9601 "cmovneq $dst, $src" %}
9602 ins_encode %{
9603 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9604 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9605 %}
9606 ins_pipe(pipe_cmov_reg);
9607 %}
9608
9609 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9610 // inputs of the CMove
9611 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9612 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9613 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9614
9615 ins_cost(200); // XXX
9616 format %{ "cmovpq $dst, $src\n\t"
9617 "cmovneq $dst, $src" %}
9618 ins_encode %{
9619 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9620 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9621 %}
9622 ins_pipe(pipe_cmov_reg);
9623 %}
9624
9625 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9626 %{
9627 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9628 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9629
9630 ins_cost(100); // XXX
9631 format %{ "setbn$cop $dst\t# signed, long" %}
9632 ins_encode %{
9633 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9634 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9635 %}
9636 ins_pipe(ialu_reg);
9637 %}
9638
9639 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9640 %{
9641 predicate(!UseAPX);
9642 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9643
9644 ins_cost(200); // XXX
9645 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9646 ins_encode %{
9647 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9648 %}
9649 ins_pipe(pipe_cmov_reg); // XXX
9650 %}
9651
9652 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9653 %{
9654 predicate(UseAPX);
9655 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9656
9657 ins_cost(200);
9658 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9659 ins_encode %{
9660 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9661 %}
9662 ins_pipe(pipe_cmov_reg);
9663 %}
9664
9665 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9666 %{
9667 predicate(!UseAPX);
9668 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9669
9670 ins_cost(200); // XXX
9671 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9672 ins_encode %{
9673 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9674 %}
9675 ins_pipe(pipe_cmov_mem); // XXX
9676 %}
9677
9678 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9679 %{
9680 predicate(UseAPX);
9681 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9682
9683 ins_cost(200);
9684 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9685 ins_encode %{
9686 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9687 %}
9688 ins_pipe(pipe_cmov_mem);
9689 %}
9690
9691 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9692 %{
9693 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9694 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9695
9696 ins_cost(100); // XXX
9697 format %{ "setbn$cop $dst\t# unsigned, long" %}
9698 ins_encode %{
9699 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9700 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9701 %}
9702 ins_pipe(ialu_reg);
9703 %}
9704
9705 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9706 %{
9707 predicate(!UseAPX);
9708 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9709
9710 ins_cost(200); // XXX
9711 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9712 ins_encode %{
9713 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9714 %}
9715 ins_pipe(pipe_cmov_reg); // XXX
9716 %}
9717
9718 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9719 %{
9720 predicate(UseAPX);
9721 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9722
9723 ins_cost(200);
9724 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9725 ins_encode %{
9726 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9727 %}
9728 ins_pipe(pipe_cmov_reg);
9729 %}
9730
9731 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9732 %{
9733 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9734 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9735
9736 ins_cost(100); // XXX
9737 format %{ "setbn$cop $dst\t# unsigned, long" %}
9738 ins_encode %{
9739 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9740 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9741 %}
9742 ins_pipe(ialu_reg);
9743 %}
9744
9745 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9746 %{
9747 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9748 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9749
9750 ins_cost(100); // XXX
9751 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9752 ins_encode %{
9753 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9754 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9755 %}
9756 ins_pipe(ialu_reg);
9757 %}
9758
9759 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9760 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9761
9762 ins_cost(200);
9763 expand %{
9764 cmovL_regU(cop, cr, dst, src);
9765 %}
9766 %}
9767
9768 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9769 %{
9770 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9771
9772 ins_cost(200);
9773 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9774 ins_encode %{
9775 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9776 %}
9777 ins_pipe(pipe_cmov_reg);
9778 %}
9779
9780 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9781 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9782 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9783
9784 ins_cost(200); // XXX
9785 format %{ "cmovpq $dst, $src\n\t"
9786 "cmovneq $dst, $src" %}
9787 ins_encode %{
9788 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9789 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9790 %}
9791 ins_pipe(pipe_cmov_reg);
9792 %}
9793
9794 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9795 // inputs of the CMove
9796 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9797 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9798 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9799
9800 ins_cost(200); // XXX
9801 format %{ "cmovpq $dst, $src\n\t"
9802 "cmovneq $dst, $src" %}
9803 ins_encode %{
9804 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9805 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9806 %}
9807 ins_pipe(pipe_cmov_reg);
9808 %}
9809
9810 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9811 %{
9812 predicate(!UseAPX);
9813 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9814
9815 ins_cost(200); // XXX
9816 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9817 ins_encode %{
9818 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9819 %}
9820 ins_pipe(pipe_cmov_mem); // XXX
9821 %}
9822
9823 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9824 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9825
9826 ins_cost(200);
9827 expand %{
9828 cmovL_memU(cop, cr, dst, src);
9829 %}
9830 %}
9831
9832 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9833 %{
9834 predicate(UseAPX);
9835 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9836
9837 ins_cost(200);
9838 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9839 ins_encode %{
9840 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9841 %}
9842 ins_pipe(pipe_cmov_mem);
9843 %}
9844
9845 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9846 %{
9847 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9848
9849 ins_cost(200);
9850 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9851 ins_encode %{
9852 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9853 %}
9854 ins_pipe(pipe_cmov_mem);
9855 %}
9856
9857 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9858 %{
9859 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9860
9861 ins_cost(200); // XXX
9862 format %{ "jn$cop skip\t# signed cmove float\n\t"
9863 "movss $dst, $src\n"
9864 "skip:" %}
9865 ins_encode %{
9866 Label Lskip;
9867 // Invert sense of branch from sense of CMOV
9868 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9869 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9870 __ bind(Lskip);
9871 %}
9872 ins_pipe(pipe_slow);
9873 %}
9874
9875 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9876 %{
9877 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9878
9879 ins_cost(200); // XXX
9880 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9881 "movss $dst, $src\n"
9882 "skip:" %}
9883 ins_encode %{
9884 Label Lskip;
9885 // Invert sense of branch from sense of CMOV
9886 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9887 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9888 __ bind(Lskip);
9889 %}
9890 ins_pipe(pipe_slow);
9891 %}
9892
9893 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9894 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9895
9896 ins_cost(200);
9897 expand %{
9898 cmovF_regU(cop, cr, dst, src);
9899 %}
9900 %}
9901
9902 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9903 %{
9904 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9905
9906 ins_cost(200); // XXX
9907 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9908 "movss $dst, $src\n"
9909 "skip:" %}
9910 ins_encode %{
9911 Label Lskip;
9912 // Invert sense of branch from sense of CMOV
9913 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9914 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9915 __ bind(Lskip);
9916 %}
9917 ins_pipe(pipe_slow);
9918 %}
9919
9920 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9921 %{
9922 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9923
9924 ins_cost(200); // XXX
9925 format %{ "jn$cop skip\t# signed cmove double\n\t"
9926 "movsd $dst, $src\n"
9927 "skip:" %}
9928 ins_encode %{
9929 Label Lskip;
9930 // Invert sense of branch from sense of CMOV
9931 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9932 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9933 __ bind(Lskip);
9934 %}
9935 ins_pipe(pipe_slow);
9936 %}
9937
9938 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9939 %{
9940 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9941
9942 ins_cost(200); // XXX
9943 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9944 "movsd $dst, $src\n"
9945 "skip:" %}
9946 ins_encode %{
9947 Label Lskip;
9948 // Invert sense of branch from sense of CMOV
9949 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9950 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9951 __ bind(Lskip);
9952 %}
9953 ins_pipe(pipe_slow);
9954 %}
9955
9956 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9957 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9958
9959 ins_cost(200);
9960 expand %{
9961 cmovD_regU(cop, cr, dst, src);
9962 %}
9963 %}
9964
9965 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9966 %{
9967 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9968
9969 ins_cost(200); // XXX
9970 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9971 "movsd $dst, $src\n"
9972 "skip:" %}
9973 ins_encode %{
9974 Label Lskip;
9975 // Invert sense of branch from sense of CMOV
9976 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9977 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9978 __ bind(Lskip);
9979 %}
9980 ins_pipe(pipe_slow);
9981 %}
9982
9983 //----------Arithmetic Instructions--------------------------------------------
9984 //----------Addition Instructions----------------------------------------------
9985
9986 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9987 %{
9988 predicate(!UseAPX);
9989 match(Set dst (AddI dst src));
9990 effect(KILL cr);
9991 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9992 format %{ "addl $dst, $src\t# int" %}
9993 ins_encode %{
9994 __ addl($dst$$Register, $src$$Register);
9995 %}
9996 ins_pipe(ialu_reg_reg);
9997 %}
9998
9999 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10000 %{
10001 predicate(UseAPX);
10002 match(Set dst (AddI src1 src2));
10003 effect(KILL cr);
10004 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10005
10006 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10007 ins_encode %{
10008 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10009 %}
10010 ins_pipe(ialu_reg_reg);
10011 %}
10012
10013 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10014 %{
10015 predicate(!UseAPX);
10016 match(Set dst (AddI dst src));
10017 effect(KILL cr);
10018 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019
10020 format %{ "addl $dst, $src\t# int" %}
10021 ins_encode %{
10022 __ addl($dst$$Register, $src$$constant);
10023 %}
10024 ins_pipe( ialu_reg );
10025 %}
10026
10027 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10028 %{
10029 predicate(UseAPX);
10030 match(Set dst (AddI src1 src2));
10031 effect(KILL cr);
10032 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10033
10034 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10035 ins_encode %{
10036 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10037 %}
10038 ins_pipe( ialu_reg );
10039 %}
10040
10041 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10042 %{
10043 predicate(UseAPX);
10044 match(Set dst (AddI (LoadI src1) src2));
10045 effect(KILL cr);
10046 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10047
10048 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10049 ins_encode %{
10050 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10051 %}
10052 ins_pipe( ialu_reg );
10053 %}
10054
10055 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10056 %{
10057 predicate(!UseAPX);
10058 match(Set dst (AddI dst (LoadI src)));
10059 effect(KILL cr);
10060 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10061
10062 ins_cost(150); // XXX
10063 format %{ "addl $dst, $src\t# int" %}
10064 ins_encode %{
10065 __ addl($dst$$Register, $src$$Address);
10066 %}
10067 ins_pipe(ialu_reg_mem);
10068 %}
10069
10070 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10071 %{
10072 predicate(UseAPX);
10073 match(Set dst (AddI src1 (LoadI src2)));
10074 effect(KILL cr);
10075 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10076
10077 ins_cost(150);
10078 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10079 ins_encode %{
10080 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10081 %}
10082 ins_pipe(ialu_reg_mem);
10083 %}
10084
10085 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10086 %{
10087 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10088 effect(KILL cr);
10089 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10090
10091 ins_cost(150); // XXX
10092 format %{ "addl $dst, $src\t# int" %}
10093 ins_encode %{
10094 __ addl($dst$$Address, $src$$Register);
10095 %}
10096 ins_pipe(ialu_mem_reg);
10097 %}
10098
10099 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10100 %{
10101 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10102 effect(KILL cr);
10103 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10104
10105
10106 ins_cost(125); // XXX
10107 format %{ "addl $dst, $src\t# int" %}
10108 ins_encode %{
10109 __ addl($dst$$Address, $src$$constant);
10110 %}
10111 ins_pipe(ialu_mem_imm);
10112 %}
10113
10114 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10115 %{
10116 predicate(!UseAPX && UseIncDec);
10117 match(Set dst (AddI dst src));
10118 effect(KILL cr);
10119
10120 format %{ "incl $dst\t# int" %}
10121 ins_encode %{
10122 __ incrementl($dst$$Register);
10123 %}
10124 ins_pipe(ialu_reg);
10125 %}
10126
10127 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10128 %{
10129 predicate(UseAPX && UseIncDec);
10130 match(Set dst (AddI src val));
10131 effect(KILL cr);
10132 flag(PD::Flag_ndd_demotable_opr1);
10133
10134 format %{ "eincl $dst, $src\t# int ndd" %}
10135 ins_encode %{
10136 __ eincl($dst$$Register, $src$$Register, false);
10137 %}
10138 ins_pipe(ialu_reg);
10139 %}
10140
10141 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10142 %{
10143 predicate(UseAPX && UseIncDec);
10144 match(Set dst (AddI (LoadI src) val));
10145 effect(KILL cr);
10146
10147 format %{ "eincl $dst, $src\t# int ndd" %}
10148 ins_encode %{
10149 __ eincl($dst$$Register, $src$$Address, false);
10150 %}
10151 ins_pipe(ialu_reg);
10152 %}
10153
10154 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10155 %{
10156 predicate(UseIncDec);
10157 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10158 effect(KILL cr);
10159
10160 ins_cost(125); // XXX
10161 format %{ "incl $dst\t# int" %}
10162 ins_encode %{
10163 __ incrementl($dst$$Address);
10164 %}
10165 ins_pipe(ialu_mem_imm);
10166 %}
10167
10168 // XXX why does that use AddI
10169 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10170 %{
10171 predicate(!UseAPX && UseIncDec);
10172 match(Set dst (AddI dst src));
10173 effect(KILL cr);
10174
10175 format %{ "decl $dst\t# int" %}
10176 ins_encode %{
10177 __ decrementl($dst$$Register);
10178 %}
10179 ins_pipe(ialu_reg);
10180 %}
10181
10182 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10183 %{
10184 predicate(UseAPX && UseIncDec);
10185 match(Set dst (AddI src val));
10186 effect(KILL cr);
10187 flag(PD::Flag_ndd_demotable_opr1);
10188
10189 format %{ "edecl $dst, $src\t# int ndd" %}
10190 ins_encode %{
10191 __ edecl($dst$$Register, $src$$Register, false);
10192 %}
10193 ins_pipe(ialu_reg);
10194 %}
10195
10196 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10197 %{
10198 predicate(UseAPX && UseIncDec);
10199 match(Set dst (AddI (LoadI src) val));
10200 effect(KILL cr);
10201
10202 format %{ "edecl $dst, $src\t# int ndd" %}
10203 ins_encode %{
10204 __ edecl($dst$$Register, $src$$Address, false);
10205 %}
10206 ins_pipe(ialu_reg);
10207 %}
10208
10209 // XXX why does that use AddI
10210 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10211 %{
10212 predicate(UseIncDec);
10213 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10214 effect(KILL cr);
10215
10216 ins_cost(125); // XXX
10217 format %{ "decl $dst\t# int" %}
10218 ins_encode %{
10219 __ decrementl($dst$$Address);
10220 %}
10221 ins_pipe(ialu_mem_imm);
10222 %}
10223
10224 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10225 %{
10226 predicate(VM_Version::supports_fast_2op_lea());
10227 match(Set dst (AddI (LShiftI index scale) disp));
10228
10229 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10230 ins_encode %{
10231 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10232 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10233 %}
10234 ins_pipe(ialu_reg_reg);
10235 %}
10236
10237 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10238 %{
10239 predicate(VM_Version::supports_fast_3op_lea());
10240 match(Set dst (AddI (AddI base index) disp));
10241
10242 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10243 ins_encode %{
10244 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10245 %}
10246 ins_pipe(ialu_reg_reg);
10247 %}
10248
10249 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10250 %{
10251 predicate(VM_Version::supports_fast_2op_lea());
10252 match(Set dst (AddI base (LShiftI index scale)));
10253
10254 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10255 ins_encode %{
10256 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10257 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10258 %}
10259 ins_pipe(ialu_reg_reg);
10260 %}
10261
10262 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10263 %{
10264 predicate(VM_Version::supports_fast_3op_lea());
10265 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10266
10267 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10268 ins_encode %{
10269 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10270 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10271 %}
10272 ins_pipe(ialu_reg_reg);
10273 %}
10274
10275 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10276 %{
10277 predicate(!UseAPX);
10278 match(Set dst (AddL dst src));
10279 effect(KILL cr);
10280 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10281
10282 format %{ "addq $dst, $src\t# long" %}
10283 ins_encode %{
10284 __ addq($dst$$Register, $src$$Register);
10285 %}
10286 ins_pipe(ialu_reg_reg);
10287 %}
10288
10289 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10290 %{
10291 predicate(UseAPX);
10292 match(Set dst (AddL src1 src2));
10293 effect(KILL cr);
10294 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10295
10296 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10297 ins_encode %{
10298 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10299 %}
10300 ins_pipe(ialu_reg_reg);
10301 %}
10302
10303 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10304 %{
10305 predicate(!UseAPX);
10306 match(Set dst (AddL dst src));
10307 effect(KILL cr);
10308 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10309
10310 format %{ "addq $dst, $src\t# long" %}
10311 ins_encode %{
10312 __ addq($dst$$Register, $src$$constant);
10313 %}
10314 ins_pipe( ialu_reg );
10315 %}
10316
10317 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10318 %{
10319 predicate(UseAPX);
10320 match(Set dst (AddL src1 src2));
10321 effect(KILL cr);
10322 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10323
10324 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10325 ins_encode %{
10326 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10327 %}
10328 ins_pipe( ialu_reg );
10329 %}
10330
10331 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10332 %{
10333 predicate(UseAPX);
10334 match(Set dst (AddL (LoadL src1) src2));
10335 effect(KILL cr);
10336 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10337
10338 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10339 ins_encode %{
10340 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10341 %}
10342 ins_pipe( ialu_reg );
10343 %}
10344
10345 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10346 %{
10347 predicate(!UseAPX);
10348 match(Set dst (AddL dst (LoadL src)));
10349 effect(KILL cr);
10350 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10351
10352 ins_cost(150); // XXX
10353 format %{ "addq $dst, $src\t# long" %}
10354 ins_encode %{
10355 __ addq($dst$$Register, $src$$Address);
10356 %}
10357 ins_pipe(ialu_reg_mem);
10358 %}
10359
10360 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10361 %{
10362 predicate(UseAPX);
10363 match(Set dst (AddL src1 (LoadL src2)));
10364 effect(KILL cr);
10365 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10366
10367 ins_cost(150);
10368 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10369 ins_encode %{
10370 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10371 %}
10372 ins_pipe(ialu_reg_mem);
10373 %}
10374
10375 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10376 %{
10377 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10378 effect(KILL cr);
10379 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10380
10381 ins_cost(150); // XXX
10382 format %{ "addq $dst, $src\t# long" %}
10383 ins_encode %{
10384 __ addq($dst$$Address, $src$$Register);
10385 %}
10386 ins_pipe(ialu_mem_reg);
10387 %}
10388
10389 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10390 %{
10391 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10392 effect(KILL cr);
10393 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10394
10395 ins_cost(125); // XXX
10396 format %{ "addq $dst, $src\t# long" %}
10397 ins_encode %{
10398 __ addq($dst$$Address, $src$$constant);
10399 %}
10400 ins_pipe(ialu_mem_imm);
10401 %}
10402
10403 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10404 %{
10405 predicate(!UseAPX && UseIncDec);
10406 match(Set dst (AddL dst src));
10407 effect(KILL cr);
10408
10409 format %{ "incq $dst\t# long" %}
10410 ins_encode %{
10411 __ incrementq($dst$$Register);
10412 %}
10413 ins_pipe(ialu_reg);
10414 %}
10415
10416 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10417 %{
10418 predicate(UseAPX && UseIncDec);
10419 match(Set dst (AddL src val));
10420 effect(KILL cr);
10421 flag(PD::Flag_ndd_demotable_opr1);
10422
10423 format %{ "eincq $dst, $src\t# long ndd" %}
10424 ins_encode %{
10425 __ eincq($dst$$Register, $src$$Register, false);
10426 %}
10427 ins_pipe(ialu_reg);
10428 %}
10429
10430 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10431 %{
10432 predicate(UseAPX && UseIncDec);
10433 match(Set dst (AddL (LoadL src) val));
10434 effect(KILL cr);
10435
10436 format %{ "eincq $dst, $src\t# long ndd" %}
10437 ins_encode %{
10438 __ eincq($dst$$Register, $src$$Address, false);
10439 %}
10440 ins_pipe(ialu_reg);
10441 %}
10442
10443 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10444 %{
10445 predicate(UseIncDec);
10446 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10447 effect(KILL cr);
10448
10449 ins_cost(125); // XXX
10450 format %{ "incq $dst\t# long" %}
10451 ins_encode %{
10452 __ incrementq($dst$$Address);
10453 %}
10454 ins_pipe(ialu_mem_imm);
10455 %}
10456
10457 // XXX why does that use AddL
10458 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10459 %{
10460 predicate(!UseAPX && UseIncDec);
10461 match(Set dst (AddL dst src));
10462 effect(KILL cr);
10463
10464 format %{ "decq $dst\t# long" %}
10465 ins_encode %{
10466 __ decrementq($dst$$Register);
10467 %}
10468 ins_pipe(ialu_reg);
10469 %}
10470
10471 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10472 %{
10473 predicate(UseAPX && UseIncDec);
10474 match(Set dst (AddL src val));
10475 effect(KILL cr);
10476 flag(PD::Flag_ndd_demotable_opr1);
10477
10478 format %{ "edecq $dst, $src\t# long ndd" %}
10479 ins_encode %{
10480 __ edecq($dst$$Register, $src$$Register, false);
10481 %}
10482 ins_pipe(ialu_reg);
10483 %}
10484
10485 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10486 %{
10487 predicate(UseAPX && UseIncDec);
10488 match(Set dst (AddL (LoadL src) val));
10489 effect(KILL cr);
10490
10491 format %{ "edecq $dst, $src\t# long ndd" %}
10492 ins_encode %{
10493 __ edecq($dst$$Register, $src$$Address, false);
10494 %}
10495 ins_pipe(ialu_reg);
10496 %}
10497
10498 // XXX why does that use AddL
10499 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10500 %{
10501 predicate(UseIncDec);
10502 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10503 effect(KILL cr);
10504
10505 ins_cost(125); // XXX
10506 format %{ "decq $dst\t# long" %}
10507 ins_encode %{
10508 __ decrementq($dst$$Address);
10509 %}
10510 ins_pipe(ialu_mem_imm);
10511 %}
10512
10513 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10514 %{
10515 predicate(VM_Version::supports_fast_2op_lea());
10516 match(Set dst (AddL (LShiftL index scale) disp));
10517
10518 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10519 ins_encode %{
10520 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10521 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10522 %}
10523 ins_pipe(ialu_reg_reg);
10524 %}
10525
10526 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10527 %{
10528 predicate(VM_Version::supports_fast_3op_lea());
10529 match(Set dst (AddL (AddL base index) disp));
10530
10531 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10532 ins_encode %{
10533 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10534 %}
10535 ins_pipe(ialu_reg_reg);
10536 %}
10537
10538 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10539 %{
10540 predicate(VM_Version::supports_fast_2op_lea());
10541 match(Set dst (AddL base (LShiftL index scale)));
10542
10543 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10544 ins_encode %{
10545 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10546 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10547 %}
10548 ins_pipe(ialu_reg_reg);
10549 %}
10550
10551 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10552 %{
10553 predicate(VM_Version::supports_fast_3op_lea());
10554 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10555
10556 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10557 ins_encode %{
10558 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10559 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10560 %}
10561 ins_pipe(ialu_reg_reg);
10562 %}
10563
10564 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10565 %{
10566 match(Set dst (AddP dst src));
10567 effect(KILL cr);
10568 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10569
10570 format %{ "addq $dst, $src\t# ptr" %}
10571 ins_encode %{
10572 __ addq($dst$$Register, $src$$Register);
10573 %}
10574 ins_pipe(ialu_reg_reg);
10575 %}
10576
10577 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10578 %{
10579 match(Set dst (AddP dst src));
10580 effect(KILL cr);
10581 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10582
10583 format %{ "addq $dst, $src\t# ptr" %}
10584 ins_encode %{
10585 __ addq($dst$$Register, $src$$constant);
10586 %}
10587 ins_pipe( ialu_reg );
10588 %}
10589
10590 // XXX addP mem ops ????
10591
10592 instruct checkCastPP(rRegP dst)
10593 %{
10594 match(Set dst (CheckCastPP dst));
10595
10596 size(0);
10597 format %{ "# checkcastPP of $dst" %}
10598 ins_encode(/* empty encoding */);
10599 ins_pipe(empty);
10600 %}
10601
10602 instruct castPP(rRegP dst)
10603 %{
10604 match(Set dst (CastPP dst));
10605
10606 size(0);
10607 format %{ "# castPP of $dst" %}
10608 ins_encode(/* empty encoding */);
10609 ins_pipe(empty);
10610 %}
10611
10612 instruct castII(rRegI dst)
10613 %{
10614 predicate(VerifyConstraintCasts == 0);
10615 match(Set dst (CastII dst));
10616
10617 size(0);
10618 format %{ "# castII of $dst" %}
10619 ins_encode(/* empty encoding */);
10620 ins_cost(0);
10621 ins_pipe(empty);
10622 %}
10623
10624 instruct castII_checked(rRegI dst, rFlagsReg cr)
10625 %{
10626 predicate(VerifyConstraintCasts > 0);
10627 match(Set dst (CastII dst));
10628
10629 effect(KILL cr);
10630 format %{ "# cast_checked_II $dst" %}
10631 ins_encode %{
10632 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10633 %}
10634 ins_pipe(pipe_slow);
10635 %}
10636
10637 instruct castLL(rRegL dst)
10638 %{
10639 predicate(VerifyConstraintCasts == 0);
10640 match(Set dst (CastLL dst));
10641
10642 size(0);
10643 format %{ "# castLL of $dst" %}
10644 ins_encode(/* empty encoding */);
10645 ins_cost(0);
10646 ins_pipe(empty);
10647 %}
10648
10649 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10650 %{
10651 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10652 match(Set dst (CastLL dst));
10653
10654 effect(KILL cr);
10655 format %{ "# cast_checked_LL $dst" %}
10656 ins_encode %{
10657 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10658 %}
10659 ins_pipe(pipe_slow);
10660 %}
10661
10662 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10663 %{
10664 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10665 match(Set dst (CastLL dst));
10666
10667 effect(KILL cr, TEMP tmp);
10668 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10669 ins_encode %{
10670 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10671 %}
10672 ins_pipe(pipe_slow);
10673 %}
10674
10675 instruct castFF(regF dst)
10676 %{
10677 match(Set dst (CastFF dst));
10678
10679 size(0);
10680 format %{ "# castFF of $dst" %}
10681 ins_encode(/* empty encoding */);
10682 ins_cost(0);
10683 ins_pipe(empty);
10684 %}
10685
10686 instruct castHH(regF dst)
10687 %{
10688 match(Set dst (CastHH dst));
10689
10690 size(0);
10691 format %{ "# castHH of $dst" %}
10692 ins_encode(/* empty encoding */);
10693 ins_cost(0);
10694 ins_pipe(empty);
10695 %}
10696
10697 instruct castDD(regD dst)
10698 %{
10699 match(Set dst (CastDD dst));
10700
10701 size(0);
10702 format %{ "# castDD of $dst" %}
10703 ins_encode(/* empty encoding */);
10704 ins_cost(0);
10705 ins_pipe(empty);
10706 %}
10707
10708 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10709 instruct compareAndSwapP(rRegI res,
10710 memory mem_ptr,
10711 rax_RegP oldval, rRegP newval,
10712 rFlagsReg cr)
10713 %{
10714 predicate(n->as_LoadStore()->barrier_data() == 0);
10715 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10716 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10717 effect(KILL cr, KILL oldval);
10718
10719 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10720 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10721 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10722 ins_encode %{
10723 __ lock();
10724 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10725 __ setcc(Assembler::equal, $res$$Register);
10726 %}
10727 ins_pipe( pipe_cmpxchg );
10728 %}
10729
10730 instruct compareAndSwapL(rRegI res,
10731 memory mem_ptr,
10732 rax_RegL oldval, rRegL newval,
10733 rFlagsReg cr)
10734 %{
10735 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10736 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10737 effect(KILL cr, KILL oldval);
10738
10739 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10740 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10741 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10742 ins_encode %{
10743 __ lock();
10744 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10745 __ setcc(Assembler::equal, $res$$Register);
10746 %}
10747 ins_pipe( pipe_cmpxchg );
10748 %}
10749
10750 instruct compareAndSwapI(rRegI res,
10751 memory mem_ptr,
10752 rax_RegI oldval, rRegI newval,
10753 rFlagsReg cr)
10754 %{
10755 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10756 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10757 effect(KILL cr, KILL oldval);
10758
10759 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10760 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10761 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10762 ins_encode %{
10763 __ lock();
10764 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10765 __ setcc(Assembler::equal, $res$$Register);
10766 %}
10767 ins_pipe( pipe_cmpxchg );
10768 %}
10769
10770 instruct compareAndSwapB(rRegI res,
10771 memory mem_ptr,
10772 rax_RegI oldval, rRegI newval,
10773 rFlagsReg cr)
10774 %{
10775 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10776 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10777 effect(KILL cr, KILL oldval);
10778
10779 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10780 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10781 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10782 ins_encode %{
10783 __ lock();
10784 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10785 __ setcc(Assembler::equal, $res$$Register);
10786 %}
10787 ins_pipe( pipe_cmpxchg );
10788 %}
10789
10790 instruct compareAndSwapS(rRegI res,
10791 memory mem_ptr,
10792 rax_RegI oldval, rRegI newval,
10793 rFlagsReg cr)
10794 %{
10795 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10796 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10797 effect(KILL cr, KILL oldval);
10798
10799 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10800 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10801 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10802 ins_encode %{
10803 __ lock();
10804 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10805 __ setcc(Assembler::equal, $res$$Register);
10806 %}
10807 ins_pipe( pipe_cmpxchg );
10808 %}
10809
10810 instruct compareAndSwapN(rRegI res,
10811 memory mem_ptr,
10812 rax_RegN oldval, rRegN newval,
10813 rFlagsReg cr) %{
10814 predicate(n->as_LoadStore()->barrier_data() == 0);
10815 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10816 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10817 effect(KILL cr, KILL oldval);
10818
10819 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10820 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10821 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10822 ins_encode %{
10823 __ lock();
10824 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10825 __ setcc(Assembler::equal, $res$$Register);
10826 %}
10827 ins_pipe( pipe_cmpxchg );
10828 %}
10829
10830 instruct compareAndExchangeB(
10831 memory mem_ptr,
10832 rax_RegI oldval, rRegI newval,
10833 rFlagsReg cr)
10834 %{
10835 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10836 effect(KILL cr);
10837
10838 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10839 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10840 ins_encode %{
10841 __ lock();
10842 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10843 %}
10844 ins_pipe( pipe_cmpxchg );
10845 %}
10846
10847 instruct compareAndExchangeS(
10848 memory mem_ptr,
10849 rax_RegI oldval, rRegI newval,
10850 rFlagsReg cr)
10851 %{
10852 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10853 effect(KILL cr);
10854
10855 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10856 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10857 ins_encode %{
10858 __ lock();
10859 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10860 %}
10861 ins_pipe( pipe_cmpxchg );
10862 %}
10863
10864 instruct compareAndExchangeI(
10865 memory mem_ptr,
10866 rax_RegI oldval, rRegI newval,
10867 rFlagsReg cr)
10868 %{
10869 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10870 effect(KILL cr);
10871
10872 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10873 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10874 ins_encode %{
10875 __ lock();
10876 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10877 %}
10878 ins_pipe( pipe_cmpxchg );
10879 %}
10880
10881 instruct compareAndExchangeL(
10882 memory mem_ptr,
10883 rax_RegL oldval, rRegL newval,
10884 rFlagsReg cr)
10885 %{
10886 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10887 effect(KILL cr);
10888
10889 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10890 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10891 ins_encode %{
10892 __ lock();
10893 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10894 %}
10895 ins_pipe( pipe_cmpxchg );
10896 %}
10897
10898 instruct compareAndExchangeN(
10899 memory mem_ptr,
10900 rax_RegN oldval, rRegN newval,
10901 rFlagsReg cr) %{
10902 predicate(n->as_LoadStore()->barrier_data() == 0);
10903 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10904 effect(KILL cr);
10905
10906 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10907 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10908 ins_encode %{
10909 __ lock();
10910 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10911 %}
10912 ins_pipe( pipe_cmpxchg );
10913 %}
10914
10915 instruct compareAndExchangeP(
10916 memory mem_ptr,
10917 rax_RegP oldval, rRegP newval,
10918 rFlagsReg cr)
10919 %{
10920 predicate(n->as_LoadStore()->barrier_data() == 0);
10921 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10922 effect(KILL cr);
10923
10924 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10925 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10926 ins_encode %{
10927 __ lock();
10928 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10929 %}
10930 ins_pipe( pipe_cmpxchg );
10931 %}
10932
10933 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10934 predicate(n->as_LoadStore()->result_not_used());
10935 match(Set dummy (GetAndAddB mem add));
10936 effect(KILL cr);
10937 format %{ "addb_lock $mem, $add" %}
10938 ins_encode %{
10939 __ lock();
10940 __ addb($mem$$Address, $add$$Register);
10941 %}
10942 ins_pipe(pipe_cmpxchg);
10943 %}
10944
10945 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10946 predicate(n->as_LoadStore()->result_not_used());
10947 match(Set dummy (GetAndAddB mem add));
10948 effect(KILL cr);
10949 format %{ "addb_lock $mem, $add" %}
10950 ins_encode %{
10951 __ lock();
10952 __ addb($mem$$Address, $add$$constant);
10953 %}
10954 ins_pipe(pipe_cmpxchg);
10955 %}
10956
10957 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10958 predicate(!n->as_LoadStore()->result_not_used());
10959 match(Set newval (GetAndAddB mem newval));
10960 effect(KILL cr);
10961 format %{ "xaddb_lock $mem, $newval" %}
10962 ins_encode %{
10963 __ lock();
10964 __ xaddb($mem$$Address, $newval$$Register);
10965 %}
10966 ins_pipe(pipe_cmpxchg);
10967 %}
10968
10969 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10970 predicate(n->as_LoadStore()->result_not_used());
10971 match(Set dummy (GetAndAddS mem add));
10972 effect(KILL cr);
10973 format %{ "addw_lock $mem, $add" %}
10974 ins_encode %{
10975 __ lock();
10976 __ addw($mem$$Address, $add$$Register);
10977 %}
10978 ins_pipe(pipe_cmpxchg);
10979 %}
10980
10981 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10982 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10983 match(Set dummy (GetAndAddS mem add));
10984 effect(KILL cr);
10985 format %{ "addw_lock $mem, $add" %}
10986 ins_encode %{
10987 __ lock();
10988 __ addw($mem$$Address, $add$$constant);
10989 %}
10990 ins_pipe(pipe_cmpxchg);
10991 %}
10992
10993 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10994 predicate(!n->as_LoadStore()->result_not_used());
10995 match(Set newval (GetAndAddS mem newval));
10996 effect(KILL cr);
10997 format %{ "xaddw_lock $mem, $newval" %}
10998 ins_encode %{
10999 __ lock();
11000 __ xaddw($mem$$Address, $newval$$Register);
11001 %}
11002 ins_pipe(pipe_cmpxchg);
11003 %}
11004
11005 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11006 predicate(n->as_LoadStore()->result_not_used());
11007 match(Set dummy (GetAndAddI mem add));
11008 effect(KILL cr);
11009 format %{ "addl_lock $mem, $add" %}
11010 ins_encode %{
11011 __ lock();
11012 __ addl($mem$$Address, $add$$Register);
11013 %}
11014 ins_pipe(pipe_cmpxchg);
11015 %}
11016
11017 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11018 predicate(n->as_LoadStore()->result_not_used());
11019 match(Set dummy (GetAndAddI mem add));
11020 effect(KILL cr);
11021 format %{ "addl_lock $mem, $add" %}
11022 ins_encode %{
11023 __ lock();
11024 __ addl($mem$$Address, $add$$constant);
11025 %}
11026 ins_pipe(pipe_cmpxchg);
11027 %}
11028
11029 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11030 predicate(!n->as_LoadStore()->result_not_used());
11031 match(Set newval (GetAndAddI mem newval));
11032 effect(KILL cr);
11033 format %{ "xaddl_lock $mem, $newval" %}
11034 ins_encode %{
11035 __ lock();
11036 __ xaddl($mem$$Address, $newval$$Register);
11037 %}
11038 ins_pipe(pipe_cmpxchg);
11039 %}
11040
11041 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11042 predicate(n->as_LoadStore()->result_not_used());
11043 match(Set dummy (GetAndAddL mem add));
11044 effect(KILL cr);
11045 format %{ "addq_lock $mem, $add" %}
11046 ins_encode %{
11047 __ lock();
11048 __ addq($mem$$Address, $add$$Register);
11049 %}
11050 ins_pipe(pipe_cmpxchg);
11051 %}
11052
11053 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11054 predicate(n->as_LoadStore()->result_not_used());
11055 match(Set dummy (GetAndAddL mem add));
11056 effect(KILL cr);
11057 format %{ "addq_lock $mem, $add" %}
11058 ins_encode %{
11059 __ lock();
11060 __ addq($mem$$Address, $add$$constant);
11061 %}
11062 ins_pipe(pipe_cmpxchg);
11063 %}
11064
11065 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11066 predicate(!n->as_LoadStore()->result_not_used());
11067 match(Set newval (GetAndAddL mem newval));
11068 effect(KILL cr);
11069 format %{ "xaddq_lock $mem, $newval" %}
11070 ins_encode %{
11071 __ lock();
11072 __ xaddq($mem$$Address, $newval$$Register);
11073 %}
11074 ins_pipe(pipe_cmpxchg);
11075 %}
11076
11077 instruct xchgB( memory mem, rRegI newval) %{
11078 match(Set newval (GetAndSetB mem newval));
11079 format %{ "XCHGB $newval,[$mem]" %}
11080 ins_encode %{
11081 __ xchgb($newval$$Register, $mem$$Address);
11082 %}
11083 ins_pipe( pipe_cmpxchg );
11084 %}
11085
11086 instruct xchgS( memory mem, rRegI newval) %{
11087 match(Set newval (GetAndSetS mem newval));
11088 format %{ "XCHGW $newval,[$mem]" %}
11089 ins_encode %{
11090 __ xchgw($newval$$Register, $mem$$Address);
11091 %}
11092 ins_pipe( pipe_cmpxchg );
11093 %}
11094
11095 instruct xchgI( memory mem, rRegI newval) %{
11096 match(Set newval (GetAndSetI mem newval));
11097 format %{ "XCHGL $newval,[$mem]" %}
11098 ins_encode %{
11099 __ xchgl($newval$$Register, $mem$$Address);
11100 %}
11101 ins_pipe( pipe_cmpxchg );
11102 %}
11103
11104 instruct xchgL( memory mem, rRegL newval) %{
11105 match(Set newval (GetAndSetL mem newval));
11106 format %{ "XCHGL $newval,[$mem]" %}
11107 ins_encode %{
11108 __ xchgq($newval$$Register, $mem$$Address);
11109 %}
11110 ins_pipe( pipe_cmpxchg );
11111 %}
11112
11113 instruct xchgP( memory mem, rRegP newval) %{
11114 match(Set newval (GetAndSetP mem newval));
11115 predicate(n->as_LoadStore()->barrier_data() == 0);
11116 format %{ "XCHGQ $newval,[$mem]" %}
11117 ins_encode %{
11118 __ xchgq($newval$$Register, $mem$$Address);
11119 %}
11120 ins_pipe( pipe_cmpxchg );
11121 %}
11122
11123 instruct xchgN( memory mem, rRegN newval) %{
11124 predicate(n->as_LoadStore()->barrier_data() == 0);
11125 match(Set newval (GetAndSetN mem newval));
11126 format %{ "XCHGL $newval,$mem]" %}
11127 ins_encode %{
11128 __ xchgl($newval$$Register, $mem$$Address);
11129 %}
11130 ins_pipe( pipe_cmpxchg );
11131 %}
11132
11133 //----------Abs Instructions-------------------------------------------
11134
11135 // Integer Absolute Instructions
11136 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11137 %{
11138 match(Set dst (AbsI src));
11139 effect(TEMP dst, KILL cr);
11140 format %{ "xorl $dst, $dst\t# abs int\n\t"
11141 "subl $dst, $src\n\t"
11142 "cmovll $dst, $src" %}
11143 ins_encode %{
11144 __ xorl($dst$$Register, $dst$$Register);
11145 __ subl($dst$$Register, $src$$Register);
11146 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11147 %}
11148
11149 ins_pipe(ialu_reg_reg);
11150 %}
11151
11152 // Long Absolute Instructions
11153 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11154 %{
11155 match(Set dst (AbsL src));
11156 effect(TEMP dst, KILL cr);
11157 format %{ "xorl $dst, $dst\t# abs long\n\t"
11158 "subq $dst, $src\n\t"
11159 "cmovlq $dst, $src" %}
11160 ins_encode %{
11161 __ xorl($dst$$Register, $dst$$Register);
11162 __ subq($dst$$Register, $src$$Register);
11163 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11164 %}
11165
11166 ins_pipe(ialu_reg_reg);
11167 %}
11168
11169 //----------Subtraction Instructions-------------------------------------------
11170
11171 // Integer Subtraction Instructions
11172 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11173 %{
11174 predicate(!UseAPX);
11175 match(Set dst (SubI dst src));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178
11179 format %{ "subl $dst, $src\t# int" %}
11180 ins_encode %{
11181 __ subl($dst$$Register, $src$$Register);
11182 %}
11183 ins_pipe(ialu_reg_reg);
11184 %}
11185
11186 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11187 %{
11188 predicate(UseAPX);
11189 match(Set dst (SubI src1 src2));
11190 effect(KILL cr);
11191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11192
11193 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11194 ins_encode %{
11195 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11196 %}
11197 ins_pipe(ialu_reg_reg);
11198 %}
11199
11200 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11201 %{
11202 predicate(UseAPX);
11203 match(Set dst (SubI src1 src2));
11204 effect(KILL cr);
11205 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11206
11207 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11208 ins_encode %{
11209 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11210 %}
11211 ins_pipe(ialu_reg_reg);
11212 %}
11213
11214 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11215 %{
11216 predicate(UseAPX);
11217 match(Set dst (SubI (LoadI src1) src2));
11218 effect(KILL cr);
11219 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11220
11221 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11222 ins_encode %{
11223 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11224 %}
11225 ins_pipe(ialu_reg_reg);
11226 %}
11227
11228 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11229 %{
11230 predicate(!UseAPX);
11231 match(Set dst (SubI dst (LoadI src)));
11232 effect(KILL cr);
11233 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11234
11235 ins_cost(150);
11236 format %{ "subl $dst, $src\t# int" %}
11237 ins_encode %{
11238 __ subl($dst$$Register, $src$$Address);
11239 %}
11240 ins_pipe(ialu_reg_mem);
11241 %}
11242
11243 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11244 %{
11245 predicate(UseAPX);
11246 match(Set dst (SubI src1 (LoadI src2)));
11247 effect(KILL cr);
11248 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11249
11250 ins_cost(150);
11251 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11252 ins_encode %{
11253 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11254 %}
11255 ins_pipe(ialu_reg_mem);
11256 %}
11257
11258 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11259 %{
11260 predicate(UseAPX);
11261 match(Set dst (SubI (LoadI src1) src2));
11262 effect(KILL cr);
11263 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11264
11265 ins_cost(150);
11266 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11267 ins_encode %{
11268 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11269 %}
11270 ins_pipe(ialu_reg_mem);
11271 %}
11272
11273 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11274 %{
11275 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11276 effect(KILL cr);
11277 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11278
11279 ins_cost(150);
11280 format %{ "subl $dst, $src\t# int" %}
11281 ins_encode %{
11282 __ subl($dst$$Address, $src$$Register);
11283 %}
11284 ins_pipe(ialu_mem_reg);
11285 %}
11286
11287 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11288 %{
11289 predicate(!UseAPX);
11290 match(Set dst (SubL dst src));
11291 effect(KILL cr);
11292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293
11294 format %{ "subq $dst, $src\t# long" %}
11295 ins_encode %{
11296 __ subq($dst$$Register, $src$$Register);
11297 %}
11298 ins_pipe(ialu_reg_reg);
11299 %}
11300
11301 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11302 %{
11303 predicate(UseAPX);
11304 match(Set dst (SubL src1 src2));
11305 effect(KILL cr);
11306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11307
11308 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11309 ins_encode %{
11310 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11311 %}
11312 ins_pipe(ialu_reg_reg);
11313 %}
11314
11315 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11316 %{
11317 predicate(UseAPX);
11318 match(Set dst (SubL src1 src2));
11319 effect(KILL cr);
11320 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11321
11322 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11323 ins_encode %{
11324 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11325 %}
11326 ins_pipe(ialu_reg_reg);
11327 %}
11328
11329 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11330 %{
11331 predicate(UseAPX);
11332 match(Set dst (SubL (LoadL src1) src2));
11333 effect(KILL cr);
11334 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11335
11336 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11337 ins_encode %{
11338 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11339 %}
11340 ins_pipe(ialu_reg_reg);
11341 %}
11342
11343 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11344 %{
11345 predicate(!UseAPX);
11346 match(Set dst (SubL dst (LoadL src)));
11347 effect(KILL cr);
11348 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11349
11350 ins_cost(150);
11351 format %{ "subq $dst, $src\t# long" %}
11352 ins_encode %{
11353 __ subq($dst$$Register, $src$$Address);
11354 %}
11355 ins_pipe(ialu_reg_mem);
11356 %}
11357
11358 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11359 %{
11360 predicate(UseAPX);
11361 match(Set dst (SubL src1 (LoadL src2)));
11362 effect(KILL cr);
11363 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11364
11365 ins_cost(150);
11366 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11367 ins_encode %{
11368 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11369 %}
11370 ins_pipe(ialu_reg_mem);
11371 %}
11372
11373 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11374 %{
11375 predicate(UseAPX);
11376 match(Set dst (SubL (LoadL src1) src2));
11377 effect(KILL cr);
11378 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11379
11380 ins_cost(150);
11381 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11382 ins_encode %{
11383 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11384 %}
11385 ins_pipe(ialu_reg_mem);
11386 %}
11387
11388 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11389 %{
11390 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11391 effect(KILL cr);
11392 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11393
11394 ins_cost(150);
11395 format %{ "subq $dst, $src\t# long" %}
11396 ins_encode %{
11397 __ subq($dst$$Address, $src$$Register);
11398 %}
11399 ins_pipe(ialu_mem_reg);
11400 %}
11401
11402 // Subtract from a pointer
11403 // XXX hmpf???
11404 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11405 %{
11406 match(Set dst (AddP dst (SubI zero src)));
11407 effect(KILL cr);
11408
11409 format %{ "subq $dst, $src\t# ptr - int" %}
11410 ins_encode %{
11411 __ subq($dst$$Register, $src$$Register);
11412 %}
11413 ins_pipe(ialu_reg_reg);
11414 %}
11415
11416 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11417 %{
11418 predicate(!UseAPX);
11419 match(Set dst (SubI zero dst));
11420 effect(KILL cr);
11421 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11422
11423 format %{ "negl $dst\t# int" %}
11424 ins_encode %{
11425 __ negl($dst$$Register);
11426 %}
11427 ins_pipe(ialu_reg);
11428 %}
11429
11430 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11431 %{
11432 predicate(UseAPX);
11433 match(Set dst (SubI zero src));
11434 effect(KILL cr);
11435 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11436
11437 format %{ "enegl $dst, $src\t# int ndd" %}
11438 ins_encode %{
11439 __ enegl($dst$$Register, $src$$Register, false);
11440 %}
11441 ins_pipe(ialu_reg);
11442 %}
11443
11444 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11445 %{
11446 predicate(!UseAPX);
11447 match(Set dst (NegI dst));
11448 effect(KILL cr);
11449 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11450
11451 format %{ "negl $dst\t# int" %}
11452 ins_encode %{
11453 __ negl($dst$$Register);
11454 %}
11455 ins_pipe(ialu_reg);
11456 %}
11457
11458 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11459 %{
11460 predicate(UseAPX);
11461 match(Set dst (NegI src));
11462 effect(KILL cr);
11463 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11464
11465 format %{ "enegl $dst, $src\t# int ndd" %}
11466 ins_encode %{
11467 __ enegl($dst$$Register, $src$$Register, false);
11468 %}
11469 ins_pipe(ialu_reg);
11470 %}
11471
11472 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11473 %{
11474 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11475 effect(KILL cr);
11476 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11477
11478 format %{ "negl $dst\t# int" %}
11479 ins_encode %{
11480 __ negl($dst$$Address);
11481 %}
11482 ins_pipe(ialu_reg);
11483 %}
11484
11485 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11486 %{
11487 predicate(!UseAPX);
11488 match(Set dst (SubL zero dst));
11489 effect(KILL cr);
11490 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11491
11492 format %{ "negq $dst\t# long" %}
11493 ins_encode %{
11494 __ negq($dst$$Register);
11495 %}
11496 ins_pipe(ialu_reg);
11497 %}
11498
11499 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11500 %{
11501 predicate(UseAPX);
11502 match(Set dst (SubL zero src));
11503 effect(KILL cr);
11504 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11505
11506 format %{ "enegq $dst, $src\t# long ndd" %}
11507 ins_encode %{
11508 __ enegq($dst$$Register, $src$$Register, false);
11509 %}
11510 ins_pipe(ialu_reg);
11511 %}
11512
11513 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11514 %{
11515 predicate(!UseAPX);
11516 match(Set dst (NegL dst));
11517 effect(KILL cr);
11518 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11519
11520 format %{ "negq $dst\t# int" %}
11521 ins_encode %{
11522 __ negq($dst$$Register);
11523 %}
11524 ins_pipe(ialu_reg);
11525 %}
11526
11527 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11528 %{
11529 predicate(UseAPX);
11530 match(Set dst (NegL src));
11531 effect(KILL cr);
11532 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11533
11534 format %{ "enegq $dst, $src\t# long ndd" %}
11535 ins_encode %{
11536 __ enegq($dst$$Register, $src$$Register, false);
11537 %}
11538 ins_pipe(ialu_reg);
11539 %}
11540
11541 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11542 %{
11543 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11544 effect(KILL cr);
11545 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11546
11547 format %{ "negq $dst\t# long" %}
11548 ins_encode %{
11549 __ negq($dst$$Address);
11550 %}
11551 ins_pipe(ialu_reg);
11552 %}
11553
11554 //----------Multiplication/Division Instructions-------------------------------
11555 // Integer Multiplication Instructions
11556 // Multiply Register
11557
11558 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11559 %{
11560 predicate(!UseAPX);
11561 match(Set dst (MulI dst src));
11562 effect(KILL cr);
11563
11564 ins_cost(300);
11565 format %{ "imull $dst, $src\t# int" %}
11566 ins_encode %{
11567 __ imull($dst$$Register, $src$$Register);
11568 %}
11569 ins_pipe(ialu_reg_reg_alu0);
11570 %}
11571
11572 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11573 %{
11574 predicate(UseAPX);
11575 match(Set dst (MulI src1 src2));
11576 effect(KILL cr);
11577 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11578
11579 ins_cost(300);
11580 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11581 ins_encode %{
11582 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11583 %}
11584 ins_pipe(ialu_reg_reg_alu0);
11585 %}
11586
11587 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11588 %{
11589 match(Set dst (MulI src imm));
11590 effect(KILL cr);
11591
11592 ins_cost(300);
11593 format %{ "imull $dst, $src, $imm\t# int" %}
11594 ins_encode %{
11595 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11596 %}
11597 ins_pipe(ialu_reg_reg_alu0);
11598 %}
11599
11600 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11601 %{
11602 predicate(!UseAPX);
11603 match(Set dst (MulI dst (LoadI src)));
11604 effect(KILL cr);
11605
11606 ins_cost(350);
11607 format %{ "imull $dst, $src\t# int" %}
11608 ins_encode %{
11609 __ imull($dst$$Register, $src$$Address);
11610 %}
11611 ins_pipe(ialu_reg_mem_alu0);
11612 %}
11613
11614 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11615 %{
11616 predicate(UseAPX);
11617 match(Set dst (MulI src1 (LoadI src2)));
11618 effect(KILL cr);
11619 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11620
11621 ins_cost(350);
11622 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11623 ins_encode %{
11624 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11625 %}
11626 ins_pipe(ialu_reg_mem_alu0);
11627 %}
11628
11629 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11630 %{
11631 match(Set dst (MulI (LoadI src) imm));
11632 effect(KILL cr);
11633
11634 ins_cost(300);
11635 format %{ "imull $dst, $src, $imm\t# int" %}
11636 ins_encode %{
11637 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11638 %}
11639 ins_pipe(ialu_reg_mem_alu0);
11640 %}
11641
11642 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11643 %{
11644 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11645 effect(KILL cr, KILL src2);
11646
11647 expand %{ mulI_rReg(dst, src1, cr);
11648 mulI_rReg(src2, src3, cr);
11649 addI_rReg(dst, src2, cr); %}
11650 %}
11651
11652 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11653 %{
11654 predicate(!UseAPX);
11655 match(Set dst (MulL dst src));
11656 effect(KILL cr);
11657
11658 ins_cost(300);
11659 format %{ "imulq $dst, $src\t# long" %}
11660 ins_encode %{
11661 __ imulq($dst$$Register, $src$$Register);
11662 %}
11663 ins_pipe(ialu_reg_reg_alu0);
11664 %}
11665
11666 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11667 %{
11668 predicate(UseAPX);
11669 match(Set dst (MulL src1 src2));
11670 effect(KILL cr);
11671 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11672
11673 ins_cost(300);
11674 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11675 ins_encode %{
11676 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11677 %}
11678 ins_pipe(ialu_reg_reg_alu0);
11679 %}
11680
11681 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11682 %{
11683 match(Set dst (MulL src imm));
11684 effect(KILL cr);
11685
11686 ins_cost(300);
11687 format %{ "imulq $dst, $src, $imm\t# long" %}
11688 ins_encode %{
11689 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11690 %}
11691 ins_pipe(ialu_reg_reg_alu0);
11692 %}
11693
11694 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11695 %{
11696 predicate(!UseAPX);
11697 match(Set dst (MulL dst (LoadL src)));
11698 effect(KILL cr);
11699
11700 ins_cost(350);
11701 format %{ "imulq $dst, $src\t# long" %}
11702 ins_encode %{
11703 __ imulq($dst$$Register, $src$$Address);
11704 %}
11705 ins_pipe(ialu_reg_mem_alu0);
11706 %}
11707
11708 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11709 %{
11710 predicate(UseAPX);
11711 match(Set dst (MulL src1 (LoadL src2)));
11712 effect(KILL cr);
11713 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11714
11715 ins_cost(350);
11716 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11717 ins_encode %{
11718 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11719 %}
11720 ins_pipe(ialu_reg_mem_alu0);
11721 %}
11722
11723 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11724 %{
11725 match(Set dst (MulL (LoadL src) imm));
11726 effect(KILL cr);
11727
11728 ins_cost(300);
11729 format %{ "imulq $dst, $src, $imm\t# long" %}
11730 ins_encode %{
11731 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11732 %}
11733 ins_pipe(ialu_reg_mem_alu0);
11734 %}
11735
11736 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11737 %{
11738 match(Set dst (MulHiL src rax));
11739 effect(USE_KILL rax, KILL cr);
11740
11741 ins_cost(300);
11742 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11743 ins_encode %{
11744 __ imulq($src$$Register);
11745 %}
11746 ins_pipe(ialu_reg_reg_alu0);
11747 %}
11748
11749 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11750 %{
11751 match(Set dst (UMulHiL src rax));
11752 effect(USE_KILL rax, KILL cr);
11753
11754 ins_cost(300);
11755 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11756 ins_encode %{
11757 __ mulq($src$$Register);
11758 %}
11759 ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761
11762 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11763 rFlagsReg cr)
11764 %{
11765 match(Set rax (DivI rax div));
11766 effect(KILL rdx, KILL cr);
11767
11768 ins_cost(30*100+10*100); // XXX
11769 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11770 "jne,s normal\n\t"
11771 "xorl rdx, rdx\n\t"
11772 "cmpl $div, -1\n\t"
11773 "je,s done\n"
11774 "normal: cdql\n\t"
11775 "idivl $div\n"
11776 "done:" %}
11777 ins_encode(cdql_enc(div));
11778 ins_pipe(ialu_reg_reg_alu0);
11779 %}
11780
11781 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11782 rFlagsReg cr)
11783 %{
11784 match(Set rax (DivL rax div));
11785 effect(KILL rdx, KILL cr);
11786
11787 ins_cost(30*100+10*100); // XXX
11788 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11789 "cmpq rax, rdx\n\t"
11790 "jne,s normal\n\t"
11791 "xorl rdx, rdx\n\t"
11792 "cmpq $div, -1\n\t"
11793 "je,s done\n"
11794 "normal: cdqq\n\t"
11795 "idivq $div\n"
11796 "done:" %}
11797 ins_encode(cdqq_enc(div));
11798 ins_pipe(ialu_reg_reg_alu0);
11799 %}
11800
11801 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11802 %{
11803 match(Set rax (UDivI rax div));
11804 effect(KILL rdx, KILL cr);
11805
11806 ins_cost(300);
11807 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11808 ins_encode %{
11809 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11810 %}
11811 ins_pipe(ialu_reg_reg_alu0);
11812 %}
11813
11814 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11815 %{
11816 match(Set rax (UDivL rax div));
11817 effect(KILL rdx, KILL cr);
11818
11819 ins_cost(300);
11820 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11821 ins_encode %{
11822 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11823 %}
11824 ins_pipe(ialu_reg_reg_alu0);
11825 %}
11826
11827 // Integer DIVMOD with Register, both quotient and mod results
11828 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11829 rFlagsReg cr)
11830 %{
11831 match(DivModI rax div);
11832 effect(KILL cr);
11833
11834 ins_cost(30*100+10*100); // XXX
11835 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11836 "jne,s normal\n\t"
11837 "xorl rdx, rdx\n\t"
11838 "cmpl $div, -1\n\t"
11839 "je,s done\n"
11840 "normal: cdql\n\t"
11841 "idivl $div\n"
11842 "done:" %}
11843 ins_encode(cdql_enc(div));
11844 ins_pipe(pipe_slow);
11845 %}
11846
11847 // Long DIVMOD with Register, both quotient and mod results
11848 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11849 rFlagsReg cr)
11850 %{
11851 match(DivModL rax div);
11852 effect(KILL cr);
11853
11854 ins_cost(30*100+10*100); // XXX
11855 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11856 "cmpq rax, rdx\n\t"
11857 "jne,s normal\n\t"
11858 "xorl rdx, rdx\n\t"
11859 "cmpq $div, -1\n\t"
11860 "je,s done\n"
11861 "normal: cdqq\n\t"
11862 "idivq $div\n"
11863 "done:" %}
11864 ins_encode(cdqq_enc(div));
11865 ins_pipe(pipe_slow);
11866 %}
11867
11868 // Unsigned integer DIVMOD with Register, both quotient and mod results
11869 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11870 no_rax_rdx_RegI div, rFlagsReg cr)
11871 %{
11872 match(UDivModI rax div);
11873 effect(TEMP tmp, KILL cr);
11874
11875 ins_cost(300);
11876 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11877 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11878 %}
11879 ins_encode %{
11880 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11881 %}
11882 ins_pipe(pipe_slow);
11883 %}
11884
11885 // Unsigned long DIVMOD with Register, both quotient and mod results
11886 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11887 no_rax_rdx_RegL div, rFlagsReg cr)
11888 %{
11889 match(UDivModL rax div);
11890 effect(TEMP tmp, KILL cr);
11891
11892 ins_cost(300);
11893 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11894 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11895 %}
11896 ins_encode %{
11897 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11898 %}
11899 ins_pipe(pipe_slow);
11900 %}
11901
11902 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11903 rFlagsReg cr)
11904 %{
11905 match(Set rdx (ModI rax div));
11906 effect(KILL rax, KILL cr);
11907
11908 ins_cost(300); // XXX
11909 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11910 "jne,s normal\n\t"
11911 "xorl rdx, rdx\n\t"
11912 "cmpl $div, -1\n\t"
11913 "je,s done\n"
11914 "normal: cdql\n\t"
11915 "idivl $div\n"
11916 "done:" %}
11917 ins_encode(cdql_enc(div));
11918 ins_pipe(ialu_reg_reg_alu0);
11919 %}
11920
11921 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11922 rFlagsReg cr)
11923 %{
11924 match(Set rdx (ModL rax div));
11925 effect(KILL rax, KILL cr);
11926
11927 ins_cost(300); // XXX
11928 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11929 "cmpq rax, rdx\n\t"
11930 "jne,s normal\n\t"
11931 "xorl rdx, rdx\n\t"
11932 "cmpq $div, -1\n\t"
11933 "je,s done\n"
11934 "normal: cdqq\n\t"
11935 "idivq $div\n"
11936 "done:" %}
11937 ins_encode(cdqq_enc(div));
11938 ins_pipe(ialu_reg_reg_alu0);
11939 %}
11940
11941 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11942 %{
11943 match(Set rdx (UModI rax div));
11944 effect(KILL rax, KILL cr);
11945
11946 ins_cost(300);
11947 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11948 ins_encode %{
11949 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11950 %}
11951 ins_pipe(ialu_reg_reg_alu0);
11952 %}
11953
11954 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11955 %{
11956 match(Set rdx (UModL rax div));
11957 effect(KILL rax, KILL cr);
11958
11959 ins_cost(300);
11960 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11961 ins_encode %{
11962 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11963 %}
11964 ins_pipe(ialu_reg_reg_alu0);
11965 %}
11966
11967 // Integer Shift Instructions
11968 // Shift Left by one, two, three
11969 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11970 %{
11971 predicate(!UseAPX);
11972 match(Set dst (LShiftI dst shift));
11973 effect(KILL cr);
11974
11975 format %{ "sall $dst, $shift" %}
11976 ins_encode %{
11977 __ sall($dst$$Register, $shift$$constant);
11978 %}
11979 ins_pipe(ialu_reg);
11980 %}
11981
11982 // Shift Left by one, two, three
11983 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11984 %{
11985 predicate(UseAPX);
11986 match(Set dst (LShiftI src shift));
11987 effect(KILL cr);
11988 flag(PD::Flag_ndd_demotable_opr1);
11989
11990 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11991 ins_encode %{
11992 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11993 %}
11994 ins_pipe(ialu_reg);
11995 %}
11996
11997 // Shift Left by 8-bit immediate
11998 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11999 %{
12000 predicate(!UseAPX);
12001 match(Set dst (LShiftI dst shift));
12002 effect(KILL cr);
12003
12004 format %{ "sall $dst, $shift" %}
12005 ins_encode %{
12006 __ sall($dst$$Register, $shift$$constant);
12007 %}
12008 ins_pipe(ialu_reg);
12009 %}
12010
12011 // Shift Left by 8-bit immediate
12012 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12013 %{
12014 predicate(UseAPX);
12015 match(Set dst (LShiftI src shift));
12016 effect(KILL cr);
12017 flag(PD::Flag_ndd_demotable_opr1);
12018
12019 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12020 ins_encode %{
12021 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12022 %}
12023 ins_pipe(ialu_reg);
12024 %}
12025
12026 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12027 %{
12028 predicate(UseAPX);
12029 match(Set dst (LShiftI (LoadI src) shift));
12030 effect(KILL cr);
12031
12032 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12033 ins_encode %{
12034 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12035 %}
12036 ins_pipe(ialu_reg);
12037 %}
12038
12039 // Shift Left by 8-bit immediate
12040 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12041 %{
12042 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12043 effect(KILL cr);
12044
12045 format %{ "sall $dst, $shift" %}
12046 ins_encode %{
12047 __ sall($dst$$Address, $shift$$constant);
12048 %}
12049 ins_pipe(ialu_mem_imm);
12050 %}
12051
12052 // Shift Left by variable
12053 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12054 %{
12055 predicate(!VM_Version::supports_bmi2());
12056 match(Set dst (LShiftI dst shift));
12057 effect(KILL cr);
12058
12059 format %{ "sall $dst, $shift" %}
12060 ins_encode %{
12061 __ sall($dst$$Register);
12062 %}
12063 ins_pipe(ialu_reg_reg);
12064 %}
12065
12066 // Shift Left by variable
12067 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12068 %{
12069 predicate(!VM_Version::supports_bmi2());
12070 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12071 effect(KILL cr);
12072
12073 format %{ "sall $dst, $shift" %}
12074 ins_encode %{
12075 __ sall($dst$$Address);
12076 %}
12077 ins_pipe(ialu_mem_reg);
12078 %}
12079
12080 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12081 %{
12082 predicate(VM_Version::supports_bmi2());
12083 match(Set dst (LShiftI src shift));
12084
12085 format %{ "shlxl $dst, $src, $shift" %}
12086 ins_encode %{
12087 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12088 %}
12089 ins_pipe(ialu_reg_reg);
12090 %}
12091
12092 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12093 %{
12094 predicate(VM_Version::supports_bmi2());
12095 match(Set dst (LShiftI (LoadI src) shift));
12096 ins_cost(175);
12097 format %{ "shlxl $dst, $src, $shift" %}
12098 ins_encode %{
12099 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12100 %}
12101 ins_pipe(ialu_reg_mem);
12102 %}
12103
12104 // Arithmetic Shift Right by 8-bit immediate
12105 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12106 %{
12107 predicate(!UseAPX);
12108 match(Set dst (RShiftI dst shift));
12109 effect(KILL cr);
12110
12111 format %{ "sarl $dst, $shift" %}
12112 ins_encode %{
12113 __ sarl($dst$$Register, $shift$$constant);
12114 %}
12115 ins_pipe(ialu_mem_imm);
12116 %}
12117
12118 // Arithmetic Shift Right by 8-bit immediate
12119 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12120 %{
12121 predicate(UseAPX);
12122 match(Set dst (RShiftI src shift));
12123 effect(KILL cr);
12124 flag(PD::Flag_ndd_demotable_opr1);
12125
12126 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12127 ins_encode %{
12128 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12129 %}
12130 ins_pipe(ialu_mem_imm);
12131 %}
12132
12133 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12134 %{
12135 predicate(UseAPX);
12136 match(Set dst (RShiftI (LoadI src) shift));
12137 effect(KILL cr);
12138
12139 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12140 ins_encode %{
12141 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12142 %}
12143 ins_pipe(ialu_mem_imm);
12144 %}
12145
12146 // Arithmetic Shift Right by 8-bit immediate
12147 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12148 %{
12149 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12150 effect(KILL cr);
12151
12152 format %{ "sarl $dst, $shift" %}
12153 ins_encode %{
12154 __ sarl($dst$$Address, $shift$$constant);
12155 %}
12156 ins_pipe(ialu_mem_imm);
12157 %}
12158
12159 // Arithmetic Shift Right by variable
12160 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12161 %{
12162 predicate(!VM_Version::supports_bmi2());
12163 match(Set dst (RShiftI dst shift));
12164 effect(KILL cr);
12165
12166 format %{ "sarl $dst, $shift" %}
12167 ins_encode %{
12168 __ sarl($dst$$Register);
12169 %}
12170 ins_pipe(ialu_reg_reg);
12171 %}
12172
12173 // Arithmetic Shift Right by variable
12174 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12175 %{
12176 predicate(!VM_Version::supports_bmi2());
12177 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12178 effect(KILL cr);
12179
12180 format %{ "sarl $dst, $shift" %}
12181 ins_encode %{
12182 __ sarl($dst$$Address);
12183 %}
12184 ins_pipe(ialu_mem_reg);
12185 %}
12186
12187 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12188 %{
12189 predicate(VM_Version::supports_bmi2());
12190 match(Set dst (RShiftI src shift));
12191
12192 format %{ "sarxl $dst, $src, $shift" %}
12193 ins_encode %{
12194 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12195 %}
12196 ins_pipe(ialu_reg_reg);
12197 %}
12198
12199 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12200 %{
12201 predicate(VM_Version::supports_bmi2());
12202 match(Set dst (RShiftI (LoadI src) shift));
12203 ins_cost(175);
12204 format %{ "sarxl $dst, $src, $shift" %}
12205 ins_encode %{
12206 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12207 %}
12208 ins_pipe(ialu_reg_mem);
12209 %}
12210
12211 // Logical Shift Right by 8-bit immediate
12212 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12213 %{
12214 predicate(!UseAPX);
12215 match(Set dst (URShiftI dst shift));
12216 effect(KILL cr);
12217
12218 format %{ "shrl $dst, $shift" %}
12219 ins_encode %{
12220 __ shrl($dst$$Register, $shift$$constant);
12221 %}
12222 ins_pipe(ialu_reg);
12223 %}
12224
12225 // Logical Shift Right by 8-bit immediate
12226 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12227 %{
12228 predicate(UseAPX);
12229 match(Set dst (URShiftI src shift));
12230 effect(KILL cr);
12231 flag(PD::Flag_ndd_demotable_opr1);
12232
12233 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12234 ins_encode %{
12235 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12236 %}
12237 ins_pipe(ialu_reg);
12238 %}
12239
12240 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12241 %{
12242 predicate(UseAPX);
12243 match(Set dst (URShiftI (LoadI src) shift));
12244 effect(KILL cr);
12245
12246 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12247 ins_encode %{
12248 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12249 %}
12250 ins_pipe(ialu_reg);
12251 %}
12252
12253 // Logical Shift Right by 8-bit immediate
12254 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12255 %{
12256 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12257 effect(KILL cr);
12258
12259 format %{ "shrl $dst, $shift" %}
12260 ins_encode %{
12261 __ shrl($dst$$Address, $shift$$constant);
12262 %}
12263 ins_pipe(ialu_mem_imm);
12264 %}
12265
12266 // Logical Shift Right by variable
12267 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12268 %{
12269 predicate(!VM_Version::supports_bmi2());
12270 match(Set dst (URShiftI dst shift));
12271 effect(KILL cr);
12272
12273 format %{ "shrl $dst, $shift" %}
12274 ins_encode %{
12275 __ shrl($dst$$Register);
12276 %}
12277 ins_pipe(ialu_reg_reg);
12278 %}
12279
12280 // Logical Shift Right by variable
12281 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12282 %{
12283 predicate(!VM_Version::supports_bmi2());
12284 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12285 effect(KILL cr);
12286
12287 format %{ "shrl $dst, $shift" %}
12288 ins_encode %{
12289 __ shrl($dst$$Address);
12290 %}
12291 ins_pipe(ialu_mem_reg);
12292 %}
12293
12294 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12295 %{
12296 predicate(VM_Version::supports_bmi2());
12297 match(Set dst (URShiftI src shift));
12298
12299 format %{ "shrxl $dst, $src, $shift" %}
12300 ins_encode %{
12301 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12302 %}
12303 ins_pipe(ialu_reg_reg);
12304 %}
12305
12306 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12307 %{
12308 predicate(VM_Version::supports_bmi2());
12309 match(Set dst (URShiftI (LoadI src) shift));
12310 ins_cost(175);
12311 format %{ "shrxl $dst, $src, $shift" %}
12312 ins_encode %{
12313 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12314 %}
12315 ins_pipe(ialu_reg_mem);
12316 %}
12317
12318 // Long Shift Instructions
12319 // Shift Left by one, two, three
12320 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12321 %{
12322 predicate(!UseAPX);
12323 match(Set dst (LShiftL dst shift));
12324 effect(KILL cr);
12325
12326 format %{ "salq $dst, $shift" %}
12327 ins_encode %{
12328 __ salq($dst$$Register, $shift$$constant);
12329 %}
12330 ins_pipe(ialu_reg);
12331 %}
12332
12333 // Shift Left by one, two, three
12334 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12335 %{
12336 predicate(UseAPX);
12337 match(Set dst (LShiftL src shift));
12338 effect(KILL cr);
12339 flag(PD::Flag_ndd_demotable_opr1);
12340
12341 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12342 ins_encode %{
12343 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12344 %}
12345 ins_pipe(ialu_reg);
12346 %}
12347
12348 // Shift Left by 8-bit immediate
12349 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12350 %{
12351 predicate(!UseAPX);
12352 match(Set dst (LShiftL dst shift));
12353 effect(KILL cr);
12354
12355 format %{ "salq $dst, $shift" %}
12356 ins_encode %{
12357 __ salq($dst$$Register, $shift$$constant);
12358 %}
12359 ins_pipe(ialu_reg);
12360 %}
12361
12362 // Shift Left by 8-bit immediate
12363 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12364 %{
12365 predicate(UseAPX);
12366 match(Set dst (LShiftL src shift));
12367 effect(KILL cr);
12368 flag(PD::Flag_ndd_demotable_opr1);
12369
12370 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12371 ins_encode %{
12372 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12373 %}
12374 ins_pipe(ialu_reg);
12375 %}
12376
12377 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12378 %{
12379 predicate(UseAPX);
12380 match(Set dst (LShiftL (LoadL src) shift));
12381 effect(KILL cr);
12382
12383 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12384 ins_encode %{
12385 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12386 %}
12387 ins_pipe(ialu_reg);
12388 %}
12389
12390 // Shift Left by 8-bit immediate
12391 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12392 %{
12393 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12394 effect(KILL cr);
12395
12396 format %{ "salq $dst, $shift" %}
12397 ins_encode %{
12398 __ salq($dst$$Address, $shift$$constant);
12399 %}
12400 ins_pipe(ialu_mem_imm);
12401 %}
12402
12403 // Shift Left by variable
12404 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12405 %{
12406 predicate(!VM_Version::supports_bmi2());
12407 match(Set dst (LShiftL dst shift));
12408 effect(KILL cr);
12409
12410 format %{ "salq $dst, $shift" %}
12411 ins_encode %{
12412 __ salq($dst$$Register);
12413 %}
12414 ins_pipe(ialu_reg_reg);
12415 %}
12416
12417 // Shift Left by variable
12418 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12419 %{
12420 predicate(!VM_Version::supports_bmi2());
12421 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12422 effect(KILL cr);
12423
12424 format %{ "salq $dst, $shift" %}
12425 ins_encode %{
12426 __ salq($dst$$Address);
12427 %}
12428 ins_pipe(ialu_mem_reg);
12429 %}
12430
12431 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12432 %{
12433 predicate(VM_Version::supports_bmi2());
12434 match(Set dst (LShiftL src shift));
12435
12436 format %{ "shlxq $dst, $src, $shift" %}
12437 ins_encode %{
12438 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12439 %}
12440 ins_pipe(ialu_reg_reg);
12441 %}
12442
12443 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12444 %{
12445 predicate(VM_Version::supports_bmi2());
12446 match(Set dst (LShiftL (LoadL src) shift));
12447 ins_cost(175);
12448 format %{ "shlxq $dst, $src, $shift" %}
12449 ins_encode %{
12450 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12451 %}
12452 ins_pipe(ialu_reg_mem);
12453 %}
12454
12455 // Arithmetic Shift Right by 8-bit immediate
12456 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12457 %{
12458 predicate(!UseAPX);
12459 match(Set dst (RShiftL dst shift));
12460 effect(KILL cr);
12461
12462 format %{ "sarq $dst, $shift" %}
12463 ins_encode %{
12464 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12465 %}
12466 ins_pipe(ialu_mem_imm);
12467 %}
12468
12469 // Arithmetic Shift Right by 8-bit immediate
12470 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12471 %{
12472 predicate(UseAPX);
12473 match(Set dst (RShiftL src shift));
12474 effect(KILL cr);
12475 flag(PD::Flag_ndd_demotable_opr1);
12476
12477 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12478 ins_encode %{
12479 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12480 %}
12481 ins_pipe(ialu_mem_imm);
12482 %}
12483
12484 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12485 %{
12486 predicate(UseAPX);
12487 match(Set dst (RShiftL (LoadL src) shift));
12488 effect(KILL cr);
12489
12490 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12491 ins_encode %{
12492 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12493 %}
12494 ins_pipe(ialu_mem_imm);
12495 %}
12496
12497 // Arithmetic Shift Right by 8-bit immediate
12498 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12499 %{
12500 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12501 effect(KILL cr);
12502
12503 format %{ "sarq $dst, $shift" %}
12504 ins_encode %{
12505 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12506 %}
12507 ins_pipe(ialu_mem_imm);
12508 %}
12509
12510 // Arithmetic Shift Right by variable
12511 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12512 %{
12513 predicate(!VM_Version::supports_bmi2());
12514 match(Set dst (RShiftL dst shift));
12515 effect(KILL cr);
12516
12517 format %{ "sarq $dst, $shift" %}
12518 ins_encode %{
12519 __ sarq($dst$$Register);
12520 %}
12521 ins_pipe(ialu_reg_reg);
12522 %}
12523
12524 // Arithmetic Shift Right by variable
12525 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12526 %{
12527 predicate(!VM_Version::supports_bmi2());
12528 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12529 effect(KILL cr);
12530
12531 format %{ "sarq $dst, $shift" %}
12532 ins_encode %{
12533 __ sarq($dst$$Address);
12534 %}
12535 ins_pipe(ialu_mem_reg);
12536 %}
12537
12538 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12539 %{
12540 predicate(VM_Version::supports_bmi2());
12541 match(Set dst (RShiftL src shift));
12542
12543 format %{ "sarxq $dst, $src, $shift" %}
12544 ins_encode %{
12545 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12546 %}
12547 ins_pipe(ialu_reg_reg);
12548 %}
12549
12550 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12551 %{
12552 predicate(VM_Version::supports_bmi2());
12553 match(Set dst (RShiftL (LoadL src) shift));
12554 ins_cost(175);
12555 format %{ "sarxq $dst, $src, $shift" %}
12556 ins_encode %{
12557 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12558 %}
12559 ins_pipe(ialu_reg_mem);
12560 %}
12561
12562 // Logical Shift Right by 8-bit immediate
12563 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12564 %{
12565 predicate(!UseAPX);
12566 match(Set dst (URShiftL dst shift));
12567 effect(KILL cr);
12568
12569 format %{ "shrq $dst, $shift" %}
12570 ins_encode %{
12571 __ shrq($dst$$Register, $shift$$constant);
12572 %}
12573 ins_pipe(ialu_reg);
12574 %}
12575
12576 // Logical Shift Right by 8-bit immediate
12577 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12578 %{
12579 predicate(UseAPX);
12580 match(Set dst (URShiftL src shift));
12581 effect(KILL cr);
12582 flag(PD::Flag_ndd_demotable_opr1);
12583
12584 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12585 ins_encode %{
12586 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12587 %}
12588 ins_pipe(ialu_reg);
12589 %}
12590
12591 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12592 %{
12593 predicate(UseAPX);
12594 match(Set dst (URShiftL (LoadL src) shift));
12595 effect(KILL cr);
12596
12597 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12598 ins_encode %{
12599 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12600 %}
12601 ins_pipe(ialu_reg);
12602 %}
12603
12604 // Logical Shift Right by 8-bit immediate
12605 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12606 %{
12607 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12608 effect(KILL cr);
12609
12610 format %{ "shrq $dst, $shift" %}
12611 ins_encode %{
12612 __ shrq($dst$$Address, $shift$$constant);
12613 %}
12614 ins_pipe(ialu_mem_imm);
12615 %}
12616
12617 // Logical Shift Right by variable
12618 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12619 %{
12620 predicate(!VM_Version::supports_bmi2());
12621 match(Set dst (URShiftL dst shift));
12622 effect(KILL cr);
12623
12624 format %{ "shrq $dst, $shift" %}
12625 ins_encode %{
12626 __ shrq($dst$$Register);
12627 %}
12628 ins_pipe(ialu_reg_reg);
12629 %}
12630
12631 // Logical Shift Right by variable
12632 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12633 %{
12634 predicate(!VM_Version::supports_bmi2());
12635 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12636 effect(KILL cr);
12637
12638 format %{ "shrq $dst, $shift" %}
12639 ins_encode %{
12640 __ shrq($dst$$Address);
12641 %}
12642 ins_pipe(ialu_mem_reg);
12643 %}
12644
12645 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12646 %{
12647 predicate(VM_Version::supports_bmi2());
12648 match(Set dst (URShiftL src shift));
12649
12650 format %{ "shrxq $dst, $src, $shift" %}
12651 ins_encode %{
12652 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12653 %}
12654 ins_pipe(ialu_reg_reg);
12655 %}
12656
12657 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12658 %{
12659 predicate(VM_Version::supports_bmi2());
12660 match(Set dst (URShiftL (LoadL src) shift));
12661 ins_cost(175);
12662 format %{ "shrxq $dst, $src, $shift" %}
12663 ins_encode %{
12664 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12665 %}
12666 ins_pipe(ialu_reg_mem);
12667 %}
12668
12669 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12670 // This idiom is used by the compiler for the i2b bytecode.
12671 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12672 %{
12673 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12674
12675 format %{ "movsbl $dst, $src\t# i2b" %}
12676 ins_encode %{
12677 __ movsbl($dst$$Register, $src$$Register);
12678 %}
12679 ins_pipe(ialu_reg_reg);
12680 %}
12681
12682 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12683 // This idiom is used by the compiler the i2s bytecode.
12684 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12685 %{
12686 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12687
12688 format %{ "movswl $dst, $src\t# i2s" %}
12689 ins_encode %{
12690 __ movswl($dst$$Register, $src$$Register);
12691 %}
12692 ins_pipe(ialu_reg_reg);
12693 %}
12694
12695 // ROL/ROR instructions
12696
12697 // Rotate left by constant.
12698 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12699 %{
12700 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12701 match(Set dst (RotateLeft dst shift));
12702 effect(KILL cr);
12703 format %{ "roll $dst, $shift" %}
12704 ins_encode %{
12705 __ roll($dst$$Register, $shift$$constant);
12706 %}
12707 ins_pipe(ialu_reg);
12708 %}
12709
12710 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12711 %{
12712 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12713 match(Set dst (RotateLeft src shift));
12714 format %{ "rolxl $dst, $src, $shift" %}
12715 ins_encode %{
12716 int shift = 32 - ($shift$$constant & 31);
12717 __ rorxl($dst$$Register, $src$$Register, shift);
12718 %}
12719 ins_pipe(ialu_reg_reg);
12720 %}
12721
12722 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12723 %{
12724 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12725 match(Set dst (RotateLeft (LoadI src) shift));
12726 ins_cost(175);
12727 format %{ "rolxl $dst, $src, $shift" %}
12728 ins_encode %{
12729 int shift = 32 - ($shift$$constant & 31);
12730 __ rorxl($dst$$Register, $src$$Address, shift);
12731 %}
12732 ins_pipe(ialu_reg_mem);
12733 %}
12734
12735 // Rotate Left by variable
12736 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12737 %{
12738 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12739 match(Set dst (RotateLeft dst shift));
12740 effect(KILL cr);
12741 format %{ "roll $dst, $shift" %}
12742 ins_encode %{
12743 __ roll($dst$$Register);
12744 %}
12745 ins_pipe(ialu_reg_reg);
12746 %}
12747
12748 // Rotate Left by variable
12749 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12750 %{
12751 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12752 match(Set dst (RotateLeft src shift));
12753 effect(KILL cr);
12754 flag(PD::Flag_ndd_demotable_opr1);
12755
12756 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12757 ins_encode %{
12758 __ eroll($dst$$Register, $src$$Register, false);
12759 %}
12760 ins_pipe(ialu_reg_reg);
12761 %}
12762
12763 // Rotate Right by constant.
12764 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12765 %{
12766 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12767 match(Set dst (RotateRight dst shift));
12768 effect(KILL cr);
12769 format %{ "rorl $dst, $shift" %}
12770 ins_encode %{
12771 __ rorl($dst$$Register, $shift$$constant);
12772 %}
12773 ins_pipe(ialu_reg);
12774 %}
12775
12776 // Rotate Right by constant.
12777 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12778 %{
12779 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12780 match(Set dst (RotateRight src shift));
12781 format %{ "rorxl $dst, $src, $shift" %}
12782 ins_encode %{
12783 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12784 %}
12785 ins_pipe(ialu_reg_reg);
12786 %}
12787
12788 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12789 %{
12790 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12791 match(Set dst (RotateRight (LoadI src) shift));
12792 ins_cost(175);
12793 format %{ "rorxl $dst, $src, $shift" %}
12794 ins_encode %{
12795 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12796 %}
12797 ins_pipe(ialu_reg_mem);
12798 %}
12799
12800 // Rotate Right by variable
12801 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12802 %{
12803 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12804 match(Set dst (RotateRight dst shift));
12805 effect(KILL cr);
12806 format %{ "rorl $dst, $shift" %}
12807 ins_encode %{
12808 __ rorl($dst$$Register);
12809 %}
12810 ins_pipe(ialu_reg_reg);
12811 %}
12812
12813 // Rotate Right by variable
12814 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12815 %{
12816 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12817 match(Set dst (RotateRight src shift));
12818 effect(KILL cr);
12819 flag(PD::Flag_ndd_demotable_opr1);
12820
12821 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12822 ins_encode %{
12823 __ erorl($dst$$Register, $src$$Register, false);
12824 %}
12825 ins_pipe(ialu_reg_reg);
12826 %}
12827
12828 // Rotate Left by constant.
12829 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12830 %{
12831 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12832 match(Set dst (RotateLeft dst shift));
12833 effect(KILL cr);
12834 format %{ "rolq $dst, $shift" %}
12835 ins_encode %{
12836 __ rolq($dst$$Register, $shift$$constant);
12837 %}
12838 ins_pipe(ialu_reg);
12839 %}
12840
12841 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12842 %{
12843 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12844 match(Set dst (RotateLeft src shift));
12845 format %{ "rolxq $dst, $src, $shift" %}
12846 ins_encode %{
12847 int shift = 64 - ($shift$$constant & 63);
12848 __ rorxq($dst$$Register, $src$$Register, shift);
12849 %}
12850 ins_pipe(ialu_reg_reg);
12851 %}
12852
12853 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12854 %{
12855 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12856 match(Set dst (RotateLeft (LoadL src) shift));
12857 ins_cost(175);
12858 format %{ "rolxq $dst, $src, $shift" %}
12859 ins_encode %{
12860 int shift = 64 - ($shift$$constant & 63);
12861 __ rorxq($dst$$Register, $src$$Address, shift);
12862 %}
12863 ins_pipe(ialu_reg_mem);
12864 %}
12865
12866 // Rotate Left by variable
12867 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12868 %{
12869 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12870 match(Set dst (RotateLeft dst shift));
12871 effect(KILL cr);
12872
12873 format %{ "rolq $dst, $shift" %}
12874 ins_encode %{
12875 __ rolq($dst$$Register);
12876 %}
12877 ins_pipe(ialu_reg_reg);
12878 %}
12879
12880 // Rotate Left by variable
12881 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12882 %{
12883 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12884 match(Set dst (RotateLeft src shift));
12885 effect(KILL cr);
12886 flag(PD::Flag_ndd_demotable_opr1);
12887
12888 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12889 ins_encode %{
12890 __ erolq($dst$$Register, $src$$Register, false);
12891 %}
12892 ins_pipe(ialu_reg_reg);
12893 %}
12894
12895 // Rotate Right by constant.
12896 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12897 %{
12898 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12899 match(Set dst (RotateRight dst shift));
12900 effect(KILL cr);
12901 format %{ "rorq $dst, $shift" %}
12902 ins_encode %{
12903 __ rorq($dst$$Register, $shift$$constant);
12904 %}
12905 ins_pipe(ialu_reg);
12906 %}
12907
12908 // Rotate Right by constant
12909 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12910 %{
12911 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12912 match(Set dst (RotateRight src shift));
12913 format %{ "rorxq $dst, $src, $shift" %}
12914 ins_encode %{
12915 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12916 %}
12917 ins_pipe(ialu_reg_reg);
12918 %}
12919
12920 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12921 %{
12922 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12923 match(Set dst (RotateRight (LoadL src) shift));
12924 ins_cost(175);
12925 format %{ "rorxq $dst, $src, $shift" %}
12926 ins_encode %{
12927 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12928 %}
12929 ins_pipe(ialu_reg_mem);
12930 %}
12931
12932 // Rotate Right by variable
12933 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12934 %{
12935 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12936 match(Set dst (RotateRight dst shift));
12937 effect(KILL cr);
12938 format %{ "rorq $dst, $shift" %}
12939 ins_encode %{
12940 __ rorq($dst$$Register);
12941 %}
12942 ins_pipe(ialu_reg_reg);
12943 %}
12944
12945 // Rotate Right by variable
12946 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12947 %{
12948 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12949 match(Set dst (RotateRight src shift));
12950 effect(KILL cr);
12951 flag(PD::Flag_ndd_demotable_opr1);
12952
12953 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12954 ins_encode %{
12955 __ erorq($dst$$Register, $src$$Register, false);
12956 %}
12957 ins_pipe(ialu_reg_reg);
12958 %}
12959
12960 //----------------------------- CompressBits/ExpandBits ------------------------
12961
12962 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12963 predicate(n->bottom_type()->isa_long());
12964 match(Set dst (CompressBits src mask));
12965 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12966 ins_encode %{
12967 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12968 %}
12969 ins_pipe( pipe_slow );
12970 %}
12971
12972 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12973 predicate(n->bottom_type()->isa_long());
12974 match(Set dst (ExpandBits src mask));
12975 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12976 ins_encode %{
12977 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12978 %}
12979 ins_pipe( pipe_slow );
12980 %}
12981
12982 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12983 predicate(n->bottom_type()->isa_long());
12984 match(Set dst (CompressBits src (LoadL mask)));
12985 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12986 ins_encode %{
12987 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12988 %}
12989 ins_pipe( pipe_slow );
12990 %}
12991
12992 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12993 predicate(n->bottom_type()->isa_long());
12994 match(Set dst (ExpandBits src (LoadL mask)));
12995 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12996 ins_encode %{
12997 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12998 %}
12999 ins_pipe( pipe_slow );
13000 %}
13001
13002
13003 // Logical Instructions
13004
13005 // Integer Logical Instructions
13006
13007 // And Instructions
13008 // And Register with Register
13009 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13010 %{
13011 predicate(!UseAPX);
13012 match(Set dst (AndI dst src));
13013 effect(KILL cr);
13014 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13015
13016 format %{ "andl $dst, $src\t# int" %}
13017 ins_encode %{
13018 __ andl($dst$$Register, $src$$Register);
13019 %}
13020 ins_pipe(ialu_reg_reg);
13021 %}
13022
13023 // And Register with Register using New Data Destination (NDD)
13024 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13025 %{
13026 predicate(UseAPX);
13027 match(Set dst (AndI src1 src2));
13028 effect(KILL cr);
13029 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13030
13031 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13032 ins_encode %{
13033 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13034
13035 %}
13036 ins_pipe(ialu_reg_reg);
13037 %}
13038
13039 // And Register with Immediate 255
13040 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13041 %{
13042 match(Set dst (AndI src mask));
13043
13044 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13045 ins_encode %{
13046 __ movzbl($dst$$Register, $src$$Register);
13047 %}
13048 ins_pipe(ialu_reg);
13049 %}
13050
13051 // And Register with Immediate 255 and promote to long
13052 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13053 %{
13054 match(Set dst (ConvI2L (AndI src mask)));
13055
13056 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13057 ins_encode %{
13058 __ movzbl($dst$$Register, $src$$Register);
13059 %}
13060 ins_pipe(ialu_reg);
13061 %}
13062
13063 // And Register with Immediate 65535
13064 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13065 %{
13066 match(Set dst (AndI src mask));
13067
13068 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13069 ins_encode %{
13070 __ movzwl($dst$$Register, $src$$Register);
13071 %}
13072 ins_pipe(ialu_reg);
13073 %}
13074
13075 // And Register with Immediate 65535 and promote to long
13076 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13077 %{
13078 match(Set dst (ConvI2L (AndI src mask)));
13079
13080 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13081 ins_encode %{
13082 __ movzwl($dst$$Register, $src$$Register);
13083 %}
13084 ins_pipe(ialu_reg);
13085 %}
13086
13087 // Can skip int2long conversions after AND with small bitmask
13088 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13089 %{
13090 predicate(VM_Version::supports_bmi2());
13091 ins_cost(125);
13092 effect(TEMP tmp, KILL cr);
13093 match(Set dst (ConvI2L (AndI src mask)));
13094 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13095 ins_encode %{
13096 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13097 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13098 %}
13099 ins_pipe(ialu_reg_reg);
13100 %}
13101
13102 // And Register with Immediate
13103 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13104 %{
13105 predicate(!UseAPX);
13106 match(Set dst (AndI dst src));
13107 effect(KILL cr);
13108 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13109
13110 format %{ "andl $dst, $src\t# int" %}
13111 ins_encode %{
13112 __ andl($dst$$Register, $src$$constant);
13113 %}
13114 ins_pipe(ialu_reg);
13115 %}
13116
13117 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13118 %{
13119 predicate(UseAPX);
13120 match(Set dst (AndI src1 src2));
13121 effect(KILL cr);
13122 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13123
13124 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13125 ins_encode %{
13126 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13127 %}
13128 ins_pipe(ialu_reg);
13129 %}
13130
13131 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13132 %{
13133 predicate(UseAPX);
13134 match(Set dst (AndI (LoadI src1) src2));
13135 effect(KILL cr);
13136 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13137
13138 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13139 ins_encode %{
13140 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13141 %}
13142 ins_pipe(ialu_reg);
13143 %}
13144
13145 // And Register with Memory
13146 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13147 %{
13148 predicate(!UseAPX);
13149 match(Set dst (AndI dst (LoadI src)));
13150 effect(KILL cr);
13151 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13152
13153 ins_cost(150);
13154 format %{ "andl $dst, $src\t# int" %}
13155 ins_encode %{
13156 __ andl($dst$$Register, $src$$Address);
13157 %}
13158 ins_pipe(ialu_reg_mem);
13159 %}
13160
13161 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13162 %{
13163 predicate(UseAPX);
13164 match(Set dst (AndI src1 (LoadI src2)));
13165 effect(KILL cr);
13166 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13167
13168 ins_cost(150);
13169 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13170 ins_encode %{
13171 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13172 %}
13173 ins_pipe(ialu_reg_mem);
13174 %}
13175
13176 // And Memory with Register
13177 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13178 %{
13179 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13180 effect(KILL cr);
13181 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13182
13183 ins_cost(150);
13184 format %{ "andb $dst, $src\t# byte" %}
13185 ins_encode %{
13186 __ andb($dst$$Address, $src$$Register);
13187 %}
13188 ins_pipe(ialu_mem_reg);
13189 %}
13190
13191 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13192 %{
13193 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13194 effect(KILL cr);
13195 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13196
13197 ins_cost(150);
13198 format %{ "andl $dst, $src\t# int" %}
13199 ins_encode %{
13200 __ andl($dst$$Address, $src$$Register);
13201 %}
13202 ins_pipe(ialu_mem_reg);
13203 %}
13204
13205 // And Memory with Immediate
13206 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13207 %{
13208 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13209 effect(KILL cr);
13210 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13211
13212 ins_cost(125);
13213 format %{ "andl $dst, $src\t# int" %}
13214 ins_encode %{
13215 __ andl($dst$$Address, $src$$constant);
13216 %}
13217 ins_pipe(ialu_mem_imm);
13218 %}
13219
13220 // BMI1 instructions
13221 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13222 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13223 predicate(UseBMI1Instructions);
13224 effect(KILL cr);
13225 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13226
13227 ins_cost(125);
13228 format %{ "andnl $dst, $src1, $src2" %}
13229
13230 ins_encode %{
13231 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13232 %}
13233 ins_pipe(ialu_reg_mem);
13234 %}
13235
13236 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13237 match(Set dst (AndI (XorI src1 minus_1) src2));
13238 predicate(UseBMI1Instructions);
13239 effect(KILL cr);
13240 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13241
13242 format %{ "andnl $dst, $src1, $src2" %}
13243
13244 ins_encode %{
13245 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13246 %}
13247 ins_pipe(ialu_reg);
13248 %}
13249
13250 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13251 match(Set dst (AndI (SubI imm_zero src) src));
13252 predicate(UseBMI1Instructions);
13253 effect(KILL cr);
13254 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13255
13256 format %{ "blsil $dst, $src" %}
13257
13258 ins_encode %{
13259 __ blsil($dst$$Register, $src$$Register);
13260 %}
13261 ins_pipe(ialu_reg);
13262 %}
13263
13264 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13265 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13266 predicate(UseBMI1Instructions);
13267 effect(KILL cr);
13268 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13269
13270 ins_cost(125);
13271 format %{ "blsil $dst, $src" %}
13272
13273 ins_encode %{
13274 __ blsil($dst$$Register, $src$$Address);
13275 %}
13276 ins_pipe(ialu_reg_mem);
13277 %}
13278
13279 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13280 %{
13281 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13282 predicate(UseBMI1Instructions);
13283 effect(KILL cr);
13284 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13285
13286 ins_cost(125);
13287 format %{ "blsmskl $dst, $src" %}
13288
13289 ins_encode %{
13290 __ blsmskl($dst$$Register, $src$$Address);
13291 %}
13292 ins_pipe(ialu_reg_mem);
13293 %}
13294
13295 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13296 %{
13297 match(Set dst (XorI (AddI src minus_1) src));
13298 predicate(UseBMI1Instructions);
13299 effect(KILL cr);
13300 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13301
13302 format %{ "blsmskl $dst, $src" %}
13303
13304 ins_encode %{
13305 __ blsmskl($dst$$Register, $src$$Register);
13306 %}
13307
13308 ins_pipe(ialu_reg);
13309 %}
13310
13311 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13312 %{
13313 match(Set dst (AndI (AddI src minus_1) src) );
13314 predicate(UseBMI1Instructions);
13315 effect(KILL cr);
13316 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13317
13318 format %{ "blsrl $dst, $src" %}
13319
13320 ins_encode %{
13321 __ blsrl($dst$$Register, $src$$Register);
13322 %}
13323
13324 ins_pipe(ialu_reg_mem);
13325 %}
13326
13327 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13328 %{
13329 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13330 predicate(UseBMI1Instructions);
13331 effect(KILL cr);
13332 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13333
13334 ins_cost(125);
13335 format %{ "blsrl $dst, $src" %}
13336
13337 ins_encode %{
13338 __ blsrl($dst$$Register, $src$$Address);
13339 %}
13340
13341 ins_pipe(ialu_reg);
13342 %}
13343
13344 // Or Instructions
13345 // Or Register with Register
13346 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13347 %{
13348 predicate(!UseAPX);
13349 match(Set dst (OrI dst src));
13350 effect(KILL cr);
13351 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13352
13353 format %{ "orl $dst, $src\t# int" %}
13354 ins_encode %{
13355 __ orl($dst$$Register, $src$$Register);
13356 %}
13357 ins_pipe(ialu_reg_reg);
13358 %}
13359
13360 // Or Register with Register using New Data Destination (NDD)
13361 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13362 %{
13363 predicate(UseAPX);
13364 match(Set dst (OrI src1 src2));
13365 effect(KILL cr);
13366 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13367
13368 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13369 ins_encode %{
13370 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13371 %}
13372 ins_pipe(ialu_reg_reg);
13373 %}
13374
13375 // Or Register with Immediate
13376 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13377 %{
13378 predicate(!UseAPX);
13379 match(Set dst (OrI dst src));
13380 effect(KILL cr);
13381 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13382
13383 format %{ "orl $dst, $src\t# int" %}
13384 ins_encode %{
13385 __ orl($dst$$Register, $src$$constant);
13386 %}
13387 ins_pipe(ialu_reg);
13388 %}
13389
13390 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13391 %{
13392 predicate(UseAPX);
13393 match(Set dst (OrI src1 src2));
13394 effect(KILL cr);
13395 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13396
13397 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13398 ins_encode %{
13399 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13400 %}
13401 ins_pipe(ialu_reg);
13402 %}
13403
13404 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13405 %{
13406 predicate(UseAPX);
13407 match(Set dst (OrI src1 src2));
13408 effect(KILL cr);
13409 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13410
13411 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13412 ins_encode %{
13413 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13414 %}
13415 ins_pipe(ialu_reg);
13416 %}
13417
13418 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13419 %{
13420 predicate(UseAPX);
13421 match(Set dst (OrI (LoadI src1) src2));
13422 effect(KILL cr);
13423 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13424
13425 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13426 ins_encode %{
13427 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13428 %}
13429 ins_pipe(ialu_reg);
13430 %}
13431
13432 // Or Register with Memory
13433 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13434 %{
13435 predicate(!UseAPX);
13436 match(Set dst (OrI dst (LoadI src)));
13437 effect(KILL cr);
13438 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13439
13440 ins_cost(150);
13441 format %{ "orl $dst, $src\t# int" %}
13442 ins_encode %{
13443 __ orl($dst$$Register, $src$$Address);
13444 %}
13445 ins_pipe(ialu_reg_mem);
13446 %}
13447
13448 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13449 %{
13450 predicate(UseAPX);
13451 match(Set dst (OrI src1 (LoadI src2)));
13452 effect(KILL cr);
13453 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13454
13455 ins_cost(150);
13456 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13457 ins_encode %{
13458 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13459 %}
13460 ins_pipe(ialu_reg_mem);
13461 %}
13462
13463 // Or Memory with Register
13464 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13465 %{
13466 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13467 effect(KILL cr);
13468 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13469
13470 ins_cost(150);
13471 format %{ "orb $dst, $src\t# byte" %}
13472 ins_encode %{
13473 __ orb($dst$$Address, $src$$Register);
13474 %}
13475 ins_pipe(ialu_mem_reg);
13476 %}
13477
13478 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13479 %{
13480 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13481 effect(KILL cr);
13482 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13483
13484 ins_cost(150);
13485 format %{ "orl $dst, $src\t# int" %}
13486 ins_encode %{
13487 __ orl($dst$$Address, $src$$Register);
13488 %}
13489 ins_pipe(ialu_mem_reg);
13490 %}
13491
13492 // Or Memory with Immediate
13493 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13494 %{
13495 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13496 effect(KILL cr);
13497 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13498
13499 ins_cost(125);
13500 format %{ "orl $dst, $src\t# int" %}
13501 ins_encode %{
13502 __ orl($dst$$Address, $src$$constant);
13503 %}
13504 ins_pipe(ialu_mem_imm);
13505 %}
13506
13507 // Xor Instructions
13508 // Xor Register with Register
13509 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13510 %{
13511 predicate(!UseAPX);
13512 match(Set dst (XorI dst src));
13513 effect(KILL cr);
13514 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13515
13516 format %{ "xorl $dst, $src\t# int" %}
13517 ins_encode %{
13518 __ xorl($dst$$Register, $src$$Register);
13519 %}
13520 ins_pipe(ialu_reg_reg);
13521 %}
13522
13523 // Xor Register with Register using New Data Destination (NDD)
13524 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13525 %{
13526 predicate(UseAPX);
13527 match(Set dst (XorI src1 src2));
13528 effect(KILL cr);
13529 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13530
13531 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13532 ins_encode %{
13533 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13534 %}
13535 ins_pipe(ialu_reg_reg);
13536 %}
13537
13538 // Xor Register with Immediate -1
13539 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13540 %{
13541 predicate(!UseAPX);
13542 match(Set dst (XorI dst imm));
13543
13544 format %{ "notl $dst" %}
13545 ins_encode %{
13546 __ notl($dst$$Register);
13547 %}
13548 ins_pipe(ialu_reg);
13549 %}
13550
13551 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13552 %{
13553 match(Set dst (XorI src imm));
13554 predicate(UseAPX);
13555 flag(PD::Flag_ndd_demotable_opr1);
13556
13557 format %{ "enotl $dst, $src" %}
13558 ins_encode %{
13559 __ enotl($dst$$Register, $src$$Register);
13560 %}
13561 ins_pipe(ialu_reg);
13562 %}
13563
13564 // Xor Register with Immediate
13565 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13566 %{
13567 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13568 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13569 match(Set dst (XorI dst src));
13570 effect(KILL cr);
13571 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13572
13573 format %{ "xorl $dst, $src\t# int" %}
13574 ins_encode %{
13575 __ xorl($dst$$Register, $src$$constant);
13576 %}
13577 ins_pipe(ialu_reg);
13578 %}
13579
13580 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13581 %{
13582 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13583 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13584 match(Set dst (XorI src1 src2));
13585 effect(KILL cr);
13586 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13587
13588 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13589 ins_encode %{
13590 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13591 %}
13592 ins_pipe(ialu_reg);
13593 %}
13594
13595 // Xor Memory with Immediate
13596 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13597 %{
13598 predicate(UseAPX);
13599 match(Set dst (XorI (LoadI src1) src2));
13600 effect(KILL cr);
13601 ins_cost(150);
13602 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13603
13604 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13605 ins_encode %{
13606 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13607 %}
13608 ins_pipe(ialu_reg);
13609 %}
13610
13611 // Xor Register with Memory
13612 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13613 %{
13614 predicate(!UseAPX);
13615 match(Set dst (XorI dst (LoadI src)));
13616 effect(KILL cr);
13617 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13618
13619 ins_cost(150);
13620 format %{ "xorl $dst, $src\t# int" %}
13621 ins_encode %{
13622 __ xorl($dst$$Register, $src$$Address);
13623 %}
13624 ins_pipe(ialu_reg_mem);
13625 %}
13626
13627 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13628 %{
13629 predicate(UseAPX);
13630 match(Set dst (XorI src1 (LoadI src2)));
13631 effect(KILL cr);
13632 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13633
13634 ins_cost(150);
13635 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13636 ins_encode %{
13637 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13638 %}
13639 ins_pipe(ialu_reg_mem);
13640 %}
13641
13642 // Xor Memory with Register
13643 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13644 %{
13645 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13646 effect(KILL cr);
13647 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13648
13649 ins_cost(150);
13650 format %{ "xorb $dst, $src\t# byte" %}
13651 ins_encode %{
13652 __ xorb($dst$$Address, $src$$Register);
13653 %}
13654 ins_pipe(ialu_mem_reg);
13655 %}
13656
13657 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13658 %{
13659 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13660 effect(KILL cr);
13661 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13662
13663 ins_cost(150);
13664 format %{ "xorl $dst, $src\t# int" %}
13665 ins_encode %{
13666 __ xorl($dst$$Address, $src$$Register);
13667 %}
13668 ins_pipe(ialu_mem_reg);
13669 %}
13670
13671 // Xor Memory with Immediate
13672 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13673 %{
13674 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13675 effect(KILL cr);
13676 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13677
13678 ins_cost(125);
13679 format %{ "xorl $dst, $src\t# int" %}
13680 ins_encode %{
13681 __ xorl($dst$$Address, $src$$constant);
13682 %}
13683 ins_pipe(ialu_mem_imm);
13684 %}
13685
13686
13687 // Long Logical Instructions
13688
13689 // And Instructions
13690 // And Register with Register
13691 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13692 %{
13693 predicate(!UseAPX);
13694 match(Set dst (AndL dst src));
13695 effect(KILL cr);
13696 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13697
13698 format %{ "andq $dst, $src\t# long" %}
13699 ins_encode %{
13700 __ andq($dst$$Register, $src$$Register);
13701 %}
13702 ins_pipe(ialu_reg_reg);
13703 %}
13704
13705 // And Register with Register using New Data Destination (NDD)
13706 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13707 %{
13708 predicate(UseAPX);
13709 match(Set dst (AndL src1 src2));
13710 effect(KILL cr);
13711 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13712
13713 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13714 ins_encode %{
13715 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13716
13717 %}
13718 ins_pipe(ialu_reg_reg);
13719 %}
13720
13721 // And Register with Immediate 255
13722 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13723 %{
13724 match(Set dst (AndL src mask));
13725
13726 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13727 ins_encode %{
13728 // movzbl zeroes out the upper 32-bit and does not need REX.W
13729 __ movzbl($dst$$Register, $src$$Register);
13730 %}
13731 ins_pipe(ialu_reg);
13732 %}
13733
13734 // And Register with Immediate 65535
13735 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13736 %{
13737 match(Set dst (AndL src mask));
13738
13739 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13740 ins_encode %{
13741 // movzwl zeroes out the upper 32-bit and does not need REX.W
13742 __ movzwl($dst$$Register, $src$$Register);
13743 %}
13744 ins_pipe(ialu_reg);
13745 %}
13746
13747 // And Register with Immediate
13748 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13749 %{
13750 predicate(!UseAPX);
13751 match(Set dst (AndL dst src));
13752 effect(KILL cr);
13753 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13754
13755 format %{ "andq $dst, $src\t# long" %}
13756 ins_encode %{
13757 __ andq($dst$$Register, $src$$constant);
13758 %}
13759 ins_pipe(ialu_reg);
13760 %}
13761
13762 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13763 %{
13764 predicate(UseAPX);
13765 match(Set dst (AndL src1 src2));
13766 effect(KILL cr);
13767 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13768
13769 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13770 ins_encode %{
13771 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13772 %}
13773 ins_pipe(ialu_reg);
13774 %}
13775
13776 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13777 %{
13778 predicate(UseAPX);
13779 match(Set dst (AndL (LoadL src1) src2));
13780 effect(KILL cr);
13781 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13782
13783 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13784 ins_encode %{
13785 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13786 %}
13787 ins_pipe(ialu_reg);
13788 %}
13789
13790 // And Register with Memory
13791 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13792 %{
13793 predicate(!UseAPX);
13794 match(Set dst (AndL dst (LoadL src)));
13795 effect(KILL cr);
13796 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13797
13798 ins_cost(150);
13799 format %{ "andq $dst, $src\t# long" %}
13800 ins_encode %{
13801 __ andq($dst$$Register, $src$$Address);
13802 %}
13803 ins_pipe(ialu_reg_mem);
13804 %}
13805
13806 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13807 %{
13808 predicate(UseAPX);
13809 match(Set dst (AndL src1 (LoadL src2)));
13810 effect(KILL cr);
13811 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13812
13813 ins_cost(150);
13814 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13815 ins_encode %{
13816 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13817 %}
13818 ins_pipe(ialu_reg_mem);
13819 %}
13820
13821 // And Memory with Register
13822 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13823 %{
13824 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13825 effect(KILL cr);
13826 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13827
13828 ins_cost(150);
13829 format %{ "andq $dst, $src\t# long" %}
13830 ins_encode %{
13831 __ andq($dst$$Address, $src$$Register);
13832 %}
13833 ins_pipe(ialu_mem_reg);
13834 %}
13835
13836 // And Memory with Immediate
13837 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13838 %{
13839 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13840 effect(KILL cr);
13841 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13842
13843 ins_cost(125);
13844 format %{ "andq $dst, $src\t# long" %}
13845 ins_encode %{
13846 __ andq($dst$$Address, $src$$constant);
13847 %}
13848 ins_pipe(ialu_mem_imm);
13849 %}
13850
13851 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13852 %{
13853 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13854 // because AND/OR works well enough for 8/32-bit values.
13855 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13856
13857 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13858 effect(KILL cr);
13859
13860 ins_cost(125);
13861 format %{ "btrq $dst, log2(not($con))\t# long" %}
13862 ins_encode %{
13863 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13864 %}
13865 ins_pipe(ialu_mem_imm);
13866 %}
13867
13868 // BMI1 instructions
13869 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13870 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13871 predicate(UseBMI1Instructions);
13872 effect(KILL cr);
13873 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13874
13875 ins_cost(125);
13876 format %{ "andnq $dst, $src1, $src2" %}
13877
13878 ins_encode %{
13879 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13880 %}
13881 ins_pipe(ialu_reg_mem);
13882 %}
13883
13884 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13885 match(Set dst (AndL (XorL src1 minus_1) src2));
13886 predicate(UseBMI1Instructions);
13887 effect(KILL cr);
13888 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13889
13890 format %{ "andnq $dst, $src1, $src2" %}
13891
13892 ins_encode %{
13893 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13894 %}
13895 ins_pipe(ialu_reg_mem);
13896 %}
13897
13898 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13899 match(Set dst (AndL (SubL imm_zero src) src));
13900 predicate(UseBMI1Instructions);
13901 effect(KILL cr);
13902 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13903
13904 format %{ "blsiq $dst, $src" %}
13905
13906 ins_encode %{
13907 __ blsiq($dst$$Register, $src$$Register);
13908 %}
13909 ins_pipe(ialu_reg);
13910 %}
13911
13912 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13913 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13914 predicate(UseBMI1Instructions);
13915 effect(KILL cr);
13916 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13917
13918 ins_cost(125);
13919 format %{ "blsiq $dst, $src" %}
13920
13921 ins_encode %{
13922 __ blsiq($dst$$Register, $src$$Address);
13923 %}
13924 ins_pipe(ialu_reg_mem);
13925 %}
13926
13927 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13928 %{
13929 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13930 predicate(UseBMI1Instructions);
13931 effect(KILL cr);
13932 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13933
13934 ins_cost(125);
13935 format %{ "blsmskq $dst, $src" %}
13936
13937 ins_encode %{
13938 __ blsmskq($dst$$Register, $src$$Address);
13939 %}
13940 ins_pipe(ialu_reg_mem);
13941 %}
13942
13943 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13944 %{
13945 match(Set dst (XorL (AddL src minus_1) src));
13946 predicate(UseBMI1Instructions);
13947 effect(KILL cr);
13948 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13949
13950 format %{ "blsmskq $dst, $src" %}
13951
13952 ins_encode %{
13953 __ blsmskq($dst$$Register, $src$$Register);
13954 %}
13955
13956 ins_pipe(ialu_reg);
13957 %}
13958
13959 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13960 %{
13961 match(Set dst (AndL (AddL src minus_1) src) );
13962 predicate(UseBMI1Instructions);
13963 effect(KILL cr);
13964 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13965
13966 format %{ "blsrq $dst, $src" %}
13967
13968 ins_encode %{
13969 __ blsrq($dst$$Register, $src$$Register);
13970 %}
13971
13972 ins_pipe(ialu_reg);
13973 %}
13974
13975 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13976 %{
13977 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13978 predicate(UseBMI1Instructions);
13979 effect(KILL cr);
13980 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13981
13982 ins_cost(125);
13983 format %{ "blsrq $dst, $src" %}
13984
13985 ins_encode %{
13986 __ blsrq($dst$$Register, $src$$Address);
13987 %}
13988
13989 ins_pipe(ialu_reg);
13990 %}
13991
13992 // Or Instructions
13993 // Or Register with Register
13994 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13995 %{
13996 predicate(!UseAPX);
13997 match(Set dst (OrL dst src));
13998 effect(KILL cr);
13999 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14000
14001 format %{ "orq $dst, $src\t# long" %}
14002 ins_encode %{
14003 __ orq($dst$$Register, $src$$Register);
14004 %}
14005 ins_pipe(ialu_reg_reg);
14006 %}
14007
14008 // Or Register with Register using New Data Destination (NDD)
14009 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14010 %{
14011 predicate(UseAPX);
14012 match(Set dst (OrL src1 src2));
14013 effect(KILL cr);
14014 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14015
14016 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14017 ins_encode %{
14018 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14019
14020 %}
14021 ins_pipe(ialu_reg_reg);
14022 %}
14023
14024 // Use any_RegP to match R15 (TLS register) without spilling.
14025 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14026 match(Set dst (OrL dst (CastP2X src)));
14027 effect(KILL cr);
14028 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14029
14030 format %{ "orq $dst, $src\t# long" %}
14031 ins_encode %{
14032 __ orq($dst$$Register, $src$$Register);
14033 %}
14034 ins_pipe(ialu_reg_reg);
14035 %}
14036
14037 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14038 match(Set dst (OrL src1 (CastP2X src2)));
14039 effect(KILL cr);
14040 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14041
14042 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14043 ins_encode %{
14044 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14045 %}
14046 ins_pipe(ialu_reg_reg);
14047 %}
14048
14049 // Or Register with Immediate
14050 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14051 %{
14052 predicate(!UseAPX);
14053 match(Set dst (OrL dst src));
14054 effect(KILL cr);
14055 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056
14057 format %{ "orq $dst, $src\t# long" %}
14058 ins_encode %{
14059 __ orq($dst$$Register, $src$$constant);
14060 %}
14061 ins_pipe(ialu_reg);
14062 %}
14063
14064 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14065 %{
14066 predicate(UseAPX);
14067 match(Set dst (OrL src1 src2));
14068 effect(KILL cr);
14069 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14070
14071 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14072 ins_encode %{
14073 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14074 %}
14075 ins_pipe(ialu_reg);
14076 %}
14077
14078 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14079 %{
14080 predicate(UseAPX);
14081 match(Set dst (OrL src1 src2));
14082 effect(KILL cr);
14083 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14084
14085 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14086 ins_encode %{
14087 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14088 %}
14089 ins_pipe(ialu_reg);
14090 %}
14091
14092 // Or Memory with Immediate
14093 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14094 %{
14095 predicate(UseAPX);
14096 match(Set dst (OrL (LoadL src1) src2));
14097 effect(KILL cr);
14098 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14099
14100 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14101 ins_encode %{
14102 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14103 %}
14104 ins_pipe(ialu_reg);
14105 %}
14106
14107 // Or Register with Memory
14108 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14109 %{
14110 predicate(!UseAPX);
14111 match(Set dst (OrL dst (LoadL src)));
14112 effect(KILL cr);
14113 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14114
14115 ins_cost(150);
14116 format %{ "orq $dst, $src\t# long" %}
14117 ins_encode %{
14118 __ orq($dst$$Register, $src$$Address);
14119 %}
14120 ins_pipe(ialu_reg_mem);
14121 %}
14122
14123 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14124 %{
14125 predicate(UseAPX);
14126 match(Set dst (OrL src1 (LoadL src2)));
14127 effect(KILL cr);
14128 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14129
14130 ins_cost(150);
14131 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14132 ins_encode %{
14133 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14134 %}
14135 ins_pipe(ialu_reg_mem);
14136 %}
14137
14138 // Or Memory with Register
14139 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14140 %{
14141 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14142 effect(KILL cr);
14143 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14144
14145 ins_cost(150);
14146 format %{ "orq $dst, $src\t# long" %}
14147 ins_encode %{
14148 __ orq($dst$$Address, $src$$Register);
14149 %}
14150 ins_pipe(ialu_mem_reg);
14151 %}
14152
14153 // Or Memory with Immediate
14154 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14155 %{
14156 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14157 effect(KILL cr);
14158 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14159
14160 ins_cost(125);
14161 format %{ "orq $dst, $src\t# long" %}
14162 ins_encode %{
14163 __ orq($dst$$Address, $src$$constant);
14164 %}
14165 ins_pipe(ialu_mem_imm);
14166 %}
14167
14168 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14169 %{
14170 // con should be a pure 64-bit power of 2 immediate
14171 // because AND/OR works well enough for 8/32-bit values.
14172 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14173
14174 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14175 effect(KILL cr);
14176
14177 ins_cost(125);
14178 format %{ "btsq $dst, log2($con)\t# long" %}
14179 ins_encode %{
14180 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14181 %}
14182 ins_pipe(ialu_mem_imm);
14183 %}
14184
14185 // Xor Instructions
14186 // Xor Register with Register
14187 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14188 %{
14189 predicate(!UseAPX);
14190 match(Set dst (XorL dst src));
14191 effect(KILL cr);
14192 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14193
14194 format %{ "xorq $dst, $src\t# long" %}
14195 ins_encode %{
14196 __ xorq($dst$$Register, $src$$Register);
14197 %}
14198 ins_pipe(ialu_reg_reg);
14199 %}
14200
14201 // Xor Register with Register using New Data Destination (NDD)
14202 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14203 %{
14204 predicate(UseAPX);
14205 match(Set dst (XorL src1 src2));
14206 effect(KILL cr);
14207 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14208
14209 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14210 ins_encode %{
14211 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14212 %}
14213 ins_pipe(ialu_reg_reg);
14214 %}
14215
14216 // Xor Register with Immediate -1
14217 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14218 %{
14219 predicate(!UseAPX);
14220 match(Set dst (XorL dst imm));
14221
14222 format %{ "notq $dst" %}
14223 ins_encode %{
14224 __ notq($dst$$Register);
14225 %}
14226 ins_pipe(ialu_reg);
14227 %}
14228
14229 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14230 %{
14231 predicate(UseAPX);
14232 match(Set dst (XorL src imm));
14233 flag(PD::Flag_ndd_demotable_opr1);
14234
14235 format %{ "enotq $dst, $src" %}
14236 ins_encode %{
14237 __ enotq($dst$$Register, $src$$Register);
14238 %}
14239 ins_pipe(ialu_reg);
14240 %}
14241
14242 // Xor Register with Immediate
14243 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14244 %{
14245 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14246 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14247 match(Set dst (XorL dst src));
14248 effect(KILL cr);
14249 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14250
14251 format %{ "xorq $dst, $src\t# long" %}
14252 ins_encode %{
14253 __ xorq($dst$$Register, $src$$constant);
14254 %}
14255 ins_pipe(ialu_reg);
14256 %}
14257
14258 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14259 %{
14260 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14261 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14262 match(Set dst (XorL src1 src2));
14263 effect(KILL cr);
14264 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14265
14266 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14267 ins_encode %{
14268 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14269 %}
14270 ins_pipe(ialu_reg);
14271 %}
14272
14273 // Xor Memory with Immediate
14274 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14275 %{
14276 predicate(UseAPX);
14277 match(Set dst (XorL (LoadL src1) src2));
14278 effect(KILL cr);
14279 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14280 ins_cost(150);
14281
14282 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14283 ins_encode %{
14284 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14285 %}
14286 ins_pipe(ialu_reg);
14287 %}
14288
14289 // Xor Register with Memory
14290 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14291 %{
14292 predicate(!UseAPX);
14293 match(Set dst (XorL dst (LoadL src)));
14294 effect(KILL cr);
14295 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14296
14297 ins_cost(150);
14298 format %{ "xorq $dst, $src\t# long" %}
14299 ins_encode %{
14300 __ xorq($dst$$Register, $src$$Address);
14301 %}
14302 ins_pipe(ialu_reg_mem);
14303 %}
14304
14305 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14306 %{
14307 predicate(UseAPX);
14308 match(Set dst (XorL src1 (LoadL src2)));
14309 effect(KILL cr);
14310 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14311
14312 ins_cost(150);
14313 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14314 ins_encode %{
14315 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14316 %}
14317 ins_pipe(ialu_reg_mem);
14318 %}
14319
14320 // Xor Memory with Register
14321 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14322 %{
14323 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14324 effect(KILL cr);
14325 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14326
14327 ins_cost(150);
14328 format %{ "xorq $dst, $src\t# long" %}
14329 ins_encode %{
14330 __ xorq($dst$$Address, $src$$Register);
14331 %}
14332 ins_pipe(ialu_mem_reg);
14333 %}
14334
14335 // Xor Memory with Immediate
14336 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14337 %{
14338 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14339 effect(KILL cr);
14340 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14341
14342 ins_cost(125);
14343 format %{ "xorq $dst, $src\t# long" %}
14344 ins_encode %{
14345 __ xorq($dst$$Address, $src$$constant);
14346 %}
14347 ins_pipe(ialu_mem_imm);
14348 %}
14349
14350 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14351 %{
14352 match(Set dst (CmpLTMask p q));
14353 effect(KILL cr);
14354
14355 ins_cost(400);
14356 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14357 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14358 "negl $dst" %}
14359 ins_encode %{
14360 __ cmpl($p$$Register, $q$$Register);
14361 __ setcc(Assembler::less, $dst$$Register);
14362 __ negl($dst$$Register);
14363 %}
14364 ins_pipe(pipe_slow);
14365 %}
14366
14367 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14368 %{
14369 match(Set dst (CmpLTMask dst zero));
14370 effect(KILL cr);
14371
14372 ins_cost(100);
14373 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14374 ins_encode %{
14375 __ sarl($dst$$Register, 31);
14376 %}
14377 ins_pipe(ialu_reg);
14378 %}
14379
14380 /* Better to save a register than avoid a branch */
14381 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14382 %{
14383 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14384 effect(KILL cr);
14385 ins_cost(300);
14386 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14387 "jge done\n\t"
14388 "addl $p,$y\n"
14389 "done: " %}
14390 ins_encode %{
14391 Register Rp = $p$$Register;
14392 Register Rq = $q$$Register;
14393 Register Ry = $y$$Register;
14394 Label done;
14395 __ subl(Rp, Rq);
14396 __ jccb(Assembler::greaterEqual, done);
14397 __ addl(Rp, Ry);
14398 __ bind(done);
14399 %}
14400 ins_pipe(pipe_cmplt);
14401 %}
14402
14403 /* Better to save a register than avoid a branch */
14404 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14405 %{
14406 match(Set y (AndI (CmpLTMask p q) y));
14407 effect(KILL cr);
14408
14409 ins_cost(300);
14410
14411 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14412 "jlt done\n\t"
14413 "xorl $y, $y\n"
14414 "done: " %}
14415 ins_encode %{
14416 Register Rp = $p$$Register;
14417 Register Rq = $q$$Register;
14418 Register Ry = $y$$Register;
14419 Label done;
14420 __ cmpl(Rp, Rq);
14421 __ jccb(Assembler::less, done);
14422 __ xorl(Ry, Ry);
14423 __ bind(done);
14424 %}
14425 ins_pipe(pipe_cmplt);
14426 %}
14427
14428
14429 //---------- FP Instructions------------------------------------------------
14430
14431 // Really expensive, avoid
14432 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14433 %{
14434 match(Set cr (CmpF src1 src2));
14435
14436 ins_cost(500);
14437 format %{ "ucomiss $src1, $src2\n\t"
14438 "jnp,s exit\n\t"
14439 "pushfq\t# saw NaN, set CF\n\t"
14440 "andq [rsp], #0xffffff2b\n\t"
14441 "popfq\n"
14442 "exit:" %}
14443 ins_encode %{
14444 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14445 emit_cmpfp_fixup(masm);
14446 %}
14447 ins_pipe(pipe_slow);
14448 %}
14449
14450 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14451 match(Set cr (CmpF src1 src2));
14452
14453 ins_cost(100);
14454 format %{ "ucomiss $src1, $src2" %}
14455 ins_encode %{
14456 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14457 %}
14458 ins_pipe(pipe_slow);
14459 %}
14460
14461 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14462 match(Set cr (CmpF src1 src2));
14463
14464 ins_cost(100);
14465 format %{ "vucomxss $src1, $src2" %}
14466 ins_encode %{
14467 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14468 %}
14469 ins_pipe(pipe_slow);
14470 %}
14471
14472 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14473 match(Set cr (CmpF src1 (LoadF src2)));
14474
14475 ins_cost(100);
14476 format %{ "ucomiss $src1, $src2" %}
14477 ins_encode %{
14478 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14479 %}
14480 ins_pipe(pipe_slow);
14481 %}
14482
14483 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14484 match(Set cr (CmpF src1 (LoadF src2)));
14485
14486 ins_cost(100);
14487 format %{ "vucomxss $src1, $src2" %}
14488 ins_encode %{
14489 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14490 %}
14491 ins_pipe(pipe_slow);
14492 %}
14493
14494 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14495 match(Set cr (CmpF src con));
14496
14497 ins_cost(100);
14498 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14499 ins_encode %{
14500 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14501 %}
14502 ins_pipe(pipe_slow);
14503 %}
14504
14505 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14506 match(Set cr (CmpF src con));
14507
14508 ins_cost(100);
14509 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14510 ins_encode %{
14511 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14512 %}
14513 ins_pipe(pipe_slow);
14514 %}
14515
14516 // Really expensive, avoid
14517 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14518 %{
14519 match(Set cr (CmpD src1 src2));
14520
14521 ins_cost(500);
14522 format %{ "ucomisd $src1, $src2\n\t"
14523 "jnp,s exit\n\t"
14524 "pushfq\t# saw NaN, set CF\n\t"
14525 "andq [rsp], #0xffffff2b\n\t"
14526 "popfq\n"
14527 "exit:" %}
14528 ins_encode %{
14529 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14530 emit_cmpfp_fixup(masm);
14531 %}
14532 ins_pipe(pipe_slow);
14533 %}
14534
14535 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14536 match(Set cr (CmpD src1 src2));
14537
14538 ins_cost(100);
14539 format %{ "ucomisd $src1, $src2 test" %}
14540 ins_encode %{
14541 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14542 %}
14543 ins_pipe(pipe_slow);
14544 %}
14545
14546 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14547 match(Set cr (CmpD src1 src2));
14548
14549 ins_cost(100);
14550 format %{ "vucomxsd $src1, $src2 test" %}
14551 ins_encode %{
14552 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14553 %}
14554 ins_pipe(pipe_slow);
14555 %}
14556
14557 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14558 match(Set cr (CmpD src1 (LoadD src2)));
14559
14560 ins_cost(100);
14561 format %{ "ucomisd $src1, $src2" %}
14562 ins_encode %{
14563 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14564 %}
14565 ins_pipe(pipe_slow);
14566 %}
14567
14568 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14569 match(Set cr (CmpD src1 (LoadD src2)));
14570
14571 ins_cost(100);
14572 format %{ "vucomxsd $src1, $src2" %}
14573 ins_encode %{
14574 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14575 %}
14576 ins_pipe(pipe_slow);
14577 %}
14578
14579 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14580 match(Set cr (CmpD src con));
14581 ins_cost(100);
14582 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14583 ins_encode %{
14584 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14585 %}
14586 ins_pipe(pipe_slow);
14587 %}
14588
14589 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14590 match(Set cr (CmpD src con));
14591
14592 ins_cost(100);
14593 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14594 ins_encode %{
14595 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14596 %}
14597 ins_pipe(pipe_slow);
14598 %}
14599
14600 // Compare into -1,0,1
14601 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14602 %{
14603 match(Set dst (CmpF3 src1 src2));
14604 effect(KILL cr);
14605
14606 ins_cost(275);
14607 format %{ "ucomiss $src1, $src2\n\t"
14608 "movl $dst, #-1\n\t"
14609 "jp,s done\n\t"
14610 "jb,s done\n\t"
14611 "setne $dst\n\t"
14612 "movzbl $dst, $dst\n"
14613 "done:" %}
14614 ins_encode %{
14615 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14616 emit_cmpfp3(masm, $dst$$Register);
14617 %}
14618 ins_pipe(pipe_slow);
14619 %}
14620
14621 // Compare into -1,0,1
14622 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14623 %{
14624 match(Set dst (CmpF3 src1 (LoadF src2)));
14625 effect(KILL cr);
14626
14627 ins_cost(275);
14628 format %{ "ucomiss $src1, $src2\n\t"
14629 "movl $dst, #-1\n\t"
14630 "jp,s done\n\t"
14631 "jb,s done\n\t"
14632 "setne $dst\n\t"
14633 "movzbl $dst, $dst\n"
14634 "done:" %}
14635 ins_encode %{
14636 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14637 emit_cmpfp3(masm, $dst$$Register);
14638 %}
14639 ins_pipe(pipe_slow);
14640 %}
14641
14642 // Compare into -1,0,1
14643 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14644 match(Set dst (CmpF3 src con));
14645 effect(KILL cr);
14646
14647 ins_cost(275);
14648 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14649 "movl $dst, #-1\n\t"
14650 "jp,s done\n\t"
14651 "jb,s done\n\t"
14652 "setne $dst\n\t"
14653 "movzbl $dst, $dst\n"
14654 "done:" %}
14655 ins_encode %{
14656 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14657 emit_cmpfp3(masm, $dst$$Register);
14658 %}
14659 ins_pipe(pipe_slow);
14660 %}
14661
14662 // Compare into -1,0,1
14663 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14664 %{
14665 match(Set dst (CmpD3 src1 src2));
14666 effect(KILL cr);
14667
14668 ins_cost(275);
14669 format %{ "ucomisd $src1, $src2\n\t"
14670 "movl $dst, #-1\n\t"
14671 "jp,s done\n\t"
14672 "jb,s done\n\t"
14673 "setne $dst\n\t"
14674 "movzbl $dst, $dst\n"
14675 "done:" %}
14676 ins_encode %{
14677 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14678 emit_cmpfp3(masm, $dst$$Register);
14679 %}
14680 ins_pipe(pipe_slow);
14681 %}
14682
14683 // Compare into -1,0,1
14684 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14685 %{
14686 match(Set dst (CmpD3 src1 (LoadD src2)));
14687 effect(KILL cr);
14688
14689 ins_cost(275);
14690 format %{ "ucomisd $src1, $src2\n\t"
14691 "movl $dst, #-1\n\t"
14692 "jp,s done\n\t"
14693 "jb,s done\n\t"
14694 "setne $dst\n\t"
14695 "movzbl $dst, $dst\n"
14696 "done:" %}
14697 ins_encode %{
14698 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14699 emit_cmpfp3(masm, $dst$$Register);
14700 %}
14701 ins_pipe(pipe_slow);
14702 %}
14703
14704 // Compare into -1,0,1
14705 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14706 match(Set dst (CmpD3 src con));
14707 effect(KILL cr);
14708
14709 ins_cost(275);
14710 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14711 "movl $dst, #-1\n\t"
14712 "jp,s done\n\t"
14713 "jb,s done\n\t"
14714 "setne $dst\n\t"
14715 "movzbl $dst, $dst\n"
14716 "done:" %}
14717 ins_encode %{
14718 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14719 emit_cmpfp3(masm, $dst$$Register);
14720 %}
14721 ins_pipe(pipe_slow);
14722 %}
14723
14724 //----------Arithmetic Conversion Instructions---------------------------------
14725
14726 instruct convF2D_reg_reg(regD dst, regF src)
14727 %{
14728 match(Set dst (ConvF2D src));
14729
14730 format %{ "cvtss2sd $dst, $src" %}
14731 ins_encode %{
14732 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14733 %}
14734 ins_pipe(pipe_slow); // XXX
14735 %}
14736
14737 instruct convF2D_reg_mem(regD dst, memory src)
14738 %{
14739 predicate(UseAVX == 0);
14740 match(Set dst (ConvF2D (LoadF src)));
14741
14742 format %{ "cvtss2sd $dst, $src" %}
14743 ins_encode %{
14744 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14745 %}
14746 ins_pipe(pipe_slow); // XXX
14747 %}
14748
14749 instruct convD2F_reg_reg(regF dst, regD src)
14750 %{
14751 match(Set dst (ConvD2F src));
14752
14753 format %{ "cvtsd2ss $dst, $src" %}
14754 ins_encode %{
14755 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14756 %}
14757 ins_pipe(pipe_slow); // XXX
14758 %}
14759
14760 instruct convD2F_reg_mem(regF dst, memory src)
14761 %{
14762 predicate(UseAVX == 0);
14763 match(Set dst (ConvD2F (LoadD src)));
14764
14765 format %{ "cvtsd2ss $dst, $src" %}
14766 ins_encode %{
14767 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14768 %}
14769 ins_pipe(pipe_slow); // XXX
14770 %}
14771
14772 // XXX do mem variants
14773 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14774 %{
14775 predicate(!VM_Version::supports_avx10_2());
14776 match(Set dst (ConvF2I src));
14777 effect(KILL cr);
14778 format %{ "convert_f2i $dst, $src" %}
14779 ins_encode %{
14780 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14781 %}
14782 ins_pipe(pipe_slow);
14783 %}
14784
14785 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14786 %{
14787 predicate(VM_Version::supports_avx10_2());
14788 match(Set dst (ConvF2I src));
14789 format %{ "evcvttss2sisl $dst, $src" %}
14790 ins_encode %{
14791 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14792 %}
14793 ins_pipe(pipe_slow);
14794 %}
14795
14796 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14797 %{
14798 predicate(VM_Version::supports_avx10_2());
14799 match(Set dst (ConvF2I (LoadF src)));
14800 format %{ "evcvttss2sisl $dst, $src" %}
14801 ins_encode %{
14802 __ evcvttss2sisl($dst$$Register, $src$$Address);
14803 %}
14804 ins_pipe(pipe_slow);
14805 %}
14806
14807 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14808 %{
14809 predicate(!VM_Version::supports_avx10_2());
14810 match(Set dst (ConvF2L src));
14811 effect(KILL cr);
14812 format %{ "convert_f2l $dst, $src"%}
14813 ins_encode %{
14814 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14815 %}
14816 ins_pipe(pipe_slow);
14817 %}
14818
14819 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14820 %{
14821 predicate(VM_Version::supports_avx10_2());
14822 match(Set dst (ConvF2L src));
14823 format %{ "evcvttss2sisq $dst, $src" %}
14824 ins_encode %{
14825 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14826 %}
14827 ins_pipe(pipe_slow);
14828 %}
14829
14830 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14831 %{
14832 predicate(VM_Version::supports_avx10_2());
14833 match(Set dst (ConvF2L (LoadF src)));
14834 format %{ "evcvttss2sisq $dst, $src" %}
14835 ins_encode %{
14836 __ evcvttss2sisq($dst$$Register, $src$$Address);
14837 %}
14838 ins_pipe(pipe_slow);
14839 %}
14840
14841 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14842 %{
14843 predicate(!VM_Version::supports_avx10_2());
14844 match(Set dst (ConvD2I src));
14845 effect(KILL cr);
14846 format %{ "convert_d2i $dst, $src"%}
14847 ins_encode %{
14848 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14849 %}
14850 ins_pipe(pipe_slow);
14851 %}
14852
14853 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14854 %{
14855 predicate(VM_Version::supports_avx10_2());
14856 match(Set dst (ConvD2I src));
14857 format %{ "evcvttsd2sisl $dst, $src" %}
14858 ins_encode %{
14859 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14860 %}
14861 ins_pipe(pipe_slow);
14862 %}
14863
14864 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14865 %{
14866 predicate(VM_Version::supports_avx10_2());
14867 match(Set dst (ConvD2I (LoadD src)));
14868 format %{ "evcvttsd2sisl $dst, $src" %}
14869 ins_encode %{
14870 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14871 %}
14872 ins_pipe(pipe_slow);
14873 %}
14874
14875 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14876 %{
14877 predicate(!VM_Version::supports_avx10_2());
14878 match(Set dst (ConvD2L src));
14879 effect(KILL cr);
14880 format %{ "convert_d2l $dst, $src"%}
14881 ins_encode %{
14882 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14883 %}
14884 ins_pipe(pipe_slow);
14885 %}
14886
14887 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14888 %{
14889 predicate(VM_Version::supports_avx10_2());
14890 match(Set dst (ConvD2L src));
14891 format %{ "evcvttsd2sisq $dst, $src" %}
14892 ins_encode %{
14893 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14894 %}
14895 ins_pipe(pipe_slow);
14896 %}
14897
14898 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14899 %{
14900 predicate(VM_Version::supports_avx10_2());
14901 match(Set dst (ConvD2L (LoadD src)));
14902 format %{ "evcvttsd2sisq $dst, $src" %}
14903 ins_encode %{
14904 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14905 %}
14906 ins_pipe(pipe_slow);
14907 %}
14908
14909 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14910 %{
14911 match(Set dst (RoundD src));
14912 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14913 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14914 ins_encode %{
14915 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14916 %}
14917 ins_pipe(pipe_slow);
14918 %}
14919
14920 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14921 %{
14922 match(Set dst (RoundF src));
14923 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14924 format %{ "round_float $dst,$src" %}
14925 ins_encode %{
14926 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14927 %}
14928 ins_pipe(pipe_slow);
14929 %}
14930
14931 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14932 %{
14933 predicate(!UseXmmI2F);
14934 match(Set dst (ConvI2F src));
14935
14936 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14937 ins_encode %{
14938 if (UseAVX > 0) {
14939 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14940 }
14941 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14942 %}
14943 ins_pipe(pipe_slow); // XXX
14944 %}
14945
14946 instruct convI2F_reg_mem(regF dst, memory src)
14947 %{
14948 predicate(UseAVX == 0);
14949 match(Set dst (ConvI2F (LoadI src)));
14950
14951 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14952 ins_encode %{
14953 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14954 %}
14955 ins_pipe(pipe_slow); // XXX
14956 %}
14957
14958 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14959 %{
14960 predicate(!UseXmmI2D);
14961 match(Set dst (ConvI2D src));
14962
14963 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14964 ins_encode %{
14965 if (UseAVX > 0) {
14966 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14967 }
14968 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14969 %}
14970 ins_pipe(pipe_slow); // XXX
14971 %}
14972
14973 instruct convI2D_reg_mem(regD dst, memory src)
14974 %{
14975 predicate(UseAVX == 0);
14976 match(Set dst (ConvI2D (LoadI src)));
14977
14978 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14979 ins_encode %{
14980 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14981 %}
14982 ins_pipe(pipe_slow); // XXX
14983 %}
14984
14985 instruct convXI2F_reg(regF dst, rRegI src)
14986 %{
14987 predicate(UseXmmI2F);
14988 match(Set dst (ConvI2F src));
14989
14990 format %{ "movdl $dst, $src\n\t"
14991 "cvtdq2psl $dst, $dst\t# i2f" %}
14992 ins_encode %{
14993 __ movdl($dst$$XMMRegister, $src$$Register);
14994 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14995 %}
14996 ins_pipe(pipe_slow); // XXX
14997 %}
14998
14999 instruct convXI2D_reg(regD dst, rRegI src)
15000 %{
15001 predicate(UseXmmI2D);
15002 match(Set dst (ConvI2D src));
15003
15004 format %{ "movdl $dst, $src\n\t"
15005 "cvtdq2pdl $dst, $dst\t# i2d" %}
15006 ins_encode %{
15007 __ movdl($dst$$XMMRegister, $src$$Register);
15008 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15009 %}
15010 ins_pipe(pipe_slow); // XXX
15011 %}
15012
15013 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15014 %{
15015 match(Set dst (ConvL2F src));
15016
15017 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15018 ins_encode %{
15019 if (UseAVX > 0) {
15020 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15021 }
15022 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15023 %}
15024 ins_pipe(pipe_slow); // XXX
15025 %}
15026
15027 instruct convL2F_reg_mem(regF dst, memory src)
15028 %{
15029 predicate(UseAVX == 0);
15030 match(Set dst (ConvL2F (LoadL src)));
15031
15032 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15033 ins_encode %{
15034 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15035 %}
15036 ins_pipe(pipe_slow); // XXX
15037 %}
15038
15039 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15040 %{
15041 match(Set dst (ConvL2D src));
15042
15043 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15044 ins_encode %{
15045 if (UseAVX > 0) {
15046 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15047 }
15048 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15049 %}
15050 ins_pipe(pipe_slow); // XXX
15051 %}
15052
15053 instruct convL2D_reg_mem(regD dst, memory src)
15054 %{
15055 predicate(UseAVX == 0);
15056 match(Set dst (ConvL2D (LoadL src)));
15057
15058 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15059 ins_encode %{
15060 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15061 %}
15062 ins_pipe(pipe_slow); // XXX
15063 %}
15064
15065 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15066 %{
15067 match(Set dst (ConvI2L src));
15068
15069 ins_cost(125);
15070 format %{ "movslq $dst, $src\t# i2l" %}
15071 ins_encode %{
15072 __ movslq($dst$$Register, $src$$Register);
15073 %}
15074 ins_pipe(ialu_reg_reg);
15075 %}
15076
15077 // Zero-extend convert int to long
15078 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15079 %{
15080 match(Set dst (AndL (ConvI2L src) mask));
15081
15082 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15083 ins_encode %{
15084 if ($dst$$reg != $src$$reg) {
15085 __ movl($dst$$Register, $src$$Register);
15086 }
15087 %}
15088 ins_pipe(ialu_reg_reg);
15089 %}
15090
15091 // Zero-extend convert int to long
15092 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15093 %{
15094 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15095
15096 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15097 ins_encode %{
15098 __ movl($dst$$Register, $src$$Address);
15099 %}
15100 ins_pipe(ialu_reg_mem);
15101 %}
15102
15103 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15104 %{
15105 match(Set dst (AndL src mask));
15106
15107 format %{ "movl $dst, $src\t# zero-extend long" %}
15108 ins_encode %{
15109 __ movl($dst$$Register, $src$$Register);
15110 %}
15111 ins_pipe(ialu_reg_reg);
15112 %}
15113
15114 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15115 %{
15116 match(Set dst (ConvL2I src));
15117
15118 format %{ "movl $dst, $src\t# l2i" %}
15119 ins_encode %{
15120 __ movl($dst$$Register, $src$$Register);
15121 %}
15122 ins_pipe(ialu_reg_reg);
15123 %}
15124
15125
15126 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15127 match(Set dst (MoveF2I src));
15128 effect(DEF dst, USE src);
15129
15130 ins_cost(125);
15131 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15132 ins_encode %{
15133 __ movl($dst$$Register, Address(rsp, $src$$disp));
15134 %}
15135 ins_pipe(ialu_reg_mem);
15136 %}
15137
15138 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15139 match(Set dst (MoveI2F src));
15140 effect(DEF dst, USE src);
15141
15142 ins_cost(125);
15143 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15144 ins_encode %{
15145 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15146 %}
15147 ins_pipe(pipe_slow);
15148 %}
15149
15150 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15151 match(Set dst (MoveD2L src));
15152 effect(DEF dst, USE src);
15153
15154 ins_cost(125);
15155 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15156 ins_encode %{
15157 __ movq($dst$$Register, Address(rsp, $src$$disp));
15158 %}
15159 ins_pipe(ialu_reg_mem);
15160 %}
15161
15162 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15163 predicate(!UseXmmLoadAndClearUpper);
15164 match(Set dst (MoveL2D src));
15165 effect(DEF dst, USE src);
15166
15167 ins_cost(125);
15168 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15169 ins_encode %{
15170 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15171 %}
15172 ins_pipe(pipe_slow);
15173 %}
15174
15175 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15176 predicate(UseXmmLoadAndClearUpper);
15177 match(Set dst (MoveL2D src));
15178 effect(DEF dst, USE src);
15179
15180 ins_cost(125);
15181 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15182 ins_encode %{
15183 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15184 %}
15185 ins_pipe(pipe_slow);
15186 %}
15187
15188
15189 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15190 match(Set dst (MoveF2I src));
15191 effect(DEF dst, USE src);
15192
15193 ins_cost(95); // XXX
15194 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15195 ins_encode %{
15196 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15197 %}
15198 ins_pipe(pipe_slow);
15199 %}
15200
15201 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15202 match(Set dst (MoveI2F src));
15203 effect(DEF dst, USE src);
15204
15205 ins_cost(100);
15206 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15207 ins_encode %{
15208 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15209 %}
15210 ins_pipe( ialu_mem_reg );
15211 %}
15212
15213 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15214 match(Set dst (MoveD2L src));
15215 effect(DEF dst, USE src);
15216
15217 ins_cost(95); // XXX
15218 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15219 ins_encode %{
15220 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15221 %}
15222 ins_pipe(pipe_slow);
15223 %}
15224
15225 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15226 match(Set dst (MoveL2D src));
15227 effect(DEF dst, USE src);
15228
15229 ins_cost(100);
15230 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15231 ins_encode %{
15232 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15233 %}
15234 ins_pipe(ialu_mem_reg);
15235 %}
15236
15237 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15238 match(Set dst (MoveF2I src));
15239 effect(DEF dst, USE src);
15240 ins_cost(85);
15241 format %{ "movd $dst,$src\t# MoveF2I" %}
15242 ins_encode %{
15243 __ movdl($dst$$Register, $src$$XMMRegister);
15244 %}
15245 ins_pipe( pipe_slow );
15246 %}
15247
15248 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15249 match(Set dst (MoveD2L src));
15250 effect(DEF dst, USE src);
15251 ins_cost(85);
15252 format %{ "movd $dst,$src\t# MoveD2L" %}
15253 ins_encode %{
15254 __ movdq($dst$$Register, $src$$XMMRegister);
15255 %}
15256 ins_pipe( pipe_slow );
15257 %}
15258
15259 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15260 match(Set dst (MoveI2F src));
15261 effect(DEF dst, USE src);
15262 ins_cost(100);
15263 format %{ "movd $dst,$src\t# MoveI2F" %}
15264 ins_encode %{
15265 __ movdl($dst$$XMMRegister, $src$$Register);
15266 %}
15267 ins_pipe( pipe_slow );
15268 %}
15269
15270 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15271 match(Set dst (MoveL2D src));
15272 effect(DEF dst, USE src);
15273 ins_cost(100);
15274 format %{ "movd $dst,$src\t# MoveL2D" %}
15275 ins_encode %{
15276 __ movdq($dst$$XMMRegister, $src$$Register);
15277 %}
15278 ins_pipe( pipe_slow );
15279 %}
15280
15281
15282 // Fast clearing of an array
15283 // Small non-constant lenght ClearArray for non-AVX512 targets.
15284 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15285 Universe dummy, rFlagsReg cr)
15286 %{
15287 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15288 match(Set dummy (ClearArray (Binary cnt base) val));
15289 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15290
15291 format %{ $$template
15292 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15293 $$emit$$"jg LARGE\n\t"
15294 $$emit$$"dec rcx\n\t"
15295 $$emit$$"js DONE\t# Zero length\n\t"
15296 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15297 $$emit$$"dec rcx\n\t"
15298 $$emit$$"jge LOOP\n\t"
15299 $$emit$$"jmp DONE\n\t"
15300 $$emit$$"# LARGE:\n\t"
15301 if (UseFastStosb) {
15302 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15303 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15304 } else if (UseXMMForObjInit) {
15305 $$emit$$"movdq $tmp, $val\n\t"
15306 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15307 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15308 $$emit$$"jmpq L_zero_64_bytes\n\t"
15309 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15310 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15311 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15312 $$emit$$"add 0x40,rax\n\t"
15313 $$emit$$"# L_zero_64_bytes:\n\t"
15314 $$emit$$"sub 0x8,rcx\n\t"
15315 $$emit$$"jge L_loop\n\t"
15316 $$emit$$"add 0x4,rcx\n\t"
15317 $$emit$$"jl L_tail\n\t"
15318 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15319 $$emit$$"add 0x20,rax\n\t"
15320 $$emit$$"sub 0x4,rcx\n\t"
15321 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15322 $$emit$$"add 0x4,rcx\n\t"
15323 $$emit$$"jle L_end\n\t"
15324 $$emit$$"dec rcx\n\t"
15325 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15326 $$emit$$"vmovq xmm0,(rax)\n\t"
15327 $$emit$$"add 0x8,rax\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"jge L_sloop\n\t"
15330 $$emit$$"# L_end:\n\t"
15331 } else {
15332 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15333 }
15334 $$emit$$"# DONE"
15335 %}
15336 ins_encode %{
15337 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15338 $tmp$$XMMRegister, false, false);
15339 %}
15340 ins_pipe(pipe_slow);
15341 %}
15342
15343 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15344 Universe dummy, rFlagsReg cr)
15345 %{
15346 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15347 match(Set dummy (ClearArray (Binary cnt base) val));
15348 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15349
15350 format %{ $$template
15351 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15352 $$emit$$"jg LARGE\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"js DONE\t# Zero length\n\t"
15355 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15356 $$emit$$"dec rcx\n\t"
15357 $$emit$$"jge LOOP\n\t"
15358 $$emit$$"jmp DONE\n\t"
15359 $$emit$$"# LARGE:\n\t"
15360 if (UseXMMForObjInit) {
15361 $$emit$$"movdq $tmp, $val\n\t"
15362 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15363 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15364 $$emit$$"jmpq L_zero_64_bytes\n\t"
15365 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15367 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15368 $$emit$$"add 0x40,rax\n\t"
15369 $$emit$$"# L_zero_64_bytes:\n\t"
15370 $$emit$$"sub 0x8,rcx\n\t"
15371 $$emit$$"jge L_loop\n\t"
15372 $$emit$$"add 0x4,rcx\n\t"
15373 $$emit$$"jl L_tail\n\t"
15374 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15375 $$emit$$"add 0x20,rax\n\t"
15376 $$emit$$"sub 0x4,rcx\n\t"
15377 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378 $$emit$$"add 0x4,rcx\n\t"
15379 $$emit$$"jle L_end\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382 $$emit$$"vmovq xmm0,(rax)\n\t"
15383 $$emit$$"add 0x8,rax\n\t"
15384 $$emit$$"dec rcx\n\t"
15385 $$emit$$"jge L_sloop\n\t"
15386 $$emit$$"# L_end:\n\t"
15387 } else {
15388 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15389 }
15390 $$emit$$"# DONE"
15391 %}
15392 ins_encode %{
15393 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15394 $tmp$$XMMRegister, false, true);
15395 %}
15396 ins_pipe(pipe_slow);
15397 %}
15398
15399 // Small non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15401 Universe dummy, rFlagsReg cr)
15402 %{
15403 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15404 match(Set dummy (ClearArray (Binary cnt base) val));
15405 ins_cost(125);
15406 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15407
15408 format %{ $$template
15409 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15410 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15411 $$emit$$"jg LARGE\n\t"
15412 $$emit$$"dec rcx\n\t"
15413 $$emit$$"js DONE\t# Zero length\n\t"
15414 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15415 $$emit$$"dec rcx\n\t"
15416 $$emit$$"jge LOOP\n\t"
15417 $$emit$$"jmp DONE\n\t"
15418 $$emit$$"# LARGE:\n\t"
15419 if (UseFastStosb) {
15420 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15421 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15422 } else if (UseXMMForObjInit) {
15423 $$emit$$"mov rdi,rax\n\t"
15424 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15425 $$emit$$"jmpq L_zero_64_bytes\n\t"
15426 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15427 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15428 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15429 $$emit$$"add 0x40,rax\n\t"
15430 $$emit$$"# L_zero_64_bytes:\n\t"
15431 $$emit$$"sub 0x8,rcx\n\t"
15432 $$emit$$"jge L_loop\n\t"
15433 $$emit$$"add 0x4,rcx\n\t"
15434 $$emit$$"jl L_tail\n\t"
15435 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15436 $$emit$$"add 0x20,rax\n\t"
15437 $$emit$$"sub 0x4,rcx\n\t"
15438 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15439 $$emit$$"add 0x4,rcx\n\t"
15440 $$emit$$"jle L_end\n\t"
15441 $$emit$$"dec rcx\n\t"
15442 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15443 $$emit$$"vmovq xmm0,(rax)\n\t"
15444 $$emit$$"add 0x8,rax\n\t"
15445 $$emit$$"dec rcx\n\t"
15446 $$emit$$"jge L_sloop\n\t"
15447 $$emit$$"# L_end:\n\t"
15448 } else {
15449 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15450 }
15451 $$emit$$"# DONE"
15452 %}
15453 ins_encode %{
15454 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15456 %}
15457 ins_pipe(pipe_slow);
15458 %}
15459
15460 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15461 Universe dummy, rFlagsReg cr)
15462 %{
15463 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15464 match(Set dummy (ClearArray (Binary cnt base) val));
15465 ins_cost(125);
15466 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15467
15468 format %{ $$template
15469 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15470 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15471 $$emit$$"jg LARGE\n\t"
15472 $$emit$$"dec rcx\n\t"
15473 $$emit$$"js DONE\t# Zero length\n\t"
15474 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15475 $$emit$$"dec rcx\n\t"
15476 $$emit$$"jge LOOP\n\t"
15477 $$emit$$"jmp DONE\n\t"
15478 $$emit$$"# LARGE:\n\t"
15479 if (UseFastStosb) {
15480 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15481 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15482 } else if (UseXMMForObjInit) {
15483 $$emit$$"mov rdi,rax\n\t"
15484 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15485 $$emit$$"jmpq L_zero_64_bytes\n\t"
15486 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15487 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15488 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15489 $$emit$$"add 0x40,rax\n\t"
15490 $$emit$$"# L_zero_64_bytes:\n\t"
15491 $$emit$$"sub 0x8,rcx\n\t"
15492 $$emit$$"jge L_loop\n\t"
15493 $$emit$$"add 0x4,rcx\n\t"
15494 $$emit$$"jl L_tail\n\t"
15495 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15496 $$emit$$"add 0x20,rax\n\t"
15497 $$emit$$"sub 0x4,rcx\n\t"
15498 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15499 $$emit$$"add 0x4,rcx\n\t"
15500 $$emit$$"jle L_end\n\t"
15501 $$emit$$"dec rcx\n\t"
15502 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15503 $$emit$$"vmovq xmm0,(rax)\n\t"
15504 $$emit$$"add 0x8,rax\n\t"
15505 $$emit$$"dec rcx\n\t"
15506 $$emit$$"jge L_sloop\n\t"
15507 $$emit$$"# L_end:\n\t"
15508 } else {
15509 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15510 }
15511 $$emit$$"# DONE"
15512 %}
15513 ins_encode %{
15514 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15515 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15516 %}
15517 ins_pipe(pipe_slow);
15518 %}
15519
15520 // Large non-constant length ClearArray for non-AVX512 targets.
15521 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15522 Universe dummy, rFlagsReg cr)
15523 %{
15524 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15525 match(Set dummy (ClearArray (Binary cnt base) val));
15526 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15527
15528 format %{ $$template
15529 if (UseFastStosb) {
15530 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15531 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15532 } else if (UseXMMForObjInit) {
15533 $$emit$$"movdq $tmp, $val\n\t"
15534 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15535 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15536 $$emit$$"jmpq L_zero_64_bytes\n\t"
15537 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15538 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15539 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15540 $$emit$$"add 0x40,rax\n\t"
15541 $$emit$$"# L_zero_64_bytes:\n\t"
15542 $$emit$$"sub 0x8,rcx\n\t"
15543 $$emit$$"jge L_loop\n\t"
15544 $$emit$$"add 0x4,rcx\n\t"
15545 $$emit$$"jl L_tail\n\t"
15546 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15547 $$emit$$"add 0x20,rax\n\t"
15548 $$emit$$"sub 0x4,rcx\n\t"
15549 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15550 $$emit$$"add 0x4,rcx\n\t"
15551 $$emit$$"jle L_end\n\t"
15552 $$emit$$"dec rcx\n\t"
15553 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15554 $$emit$$"vmovq xmm0,(rax)\n\t"
15555 $$emit$$"add 0x8,rax\n\t"
15556 $$emit$$"dec rcx\n\t"
15557 $$emit$$"jge L_sloop\n\t"
15558 $$emit$$"# L_end:\n\t"
15559 } else {
15560 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15561 }
15562 %}
15563 ins_encode %{
15564 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15565 $tmp$$XMMRegister, true, false);
15566 %}
15567 ins_pipe(pipe_slow);
15568 %}
15569
15570 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15571 Universe dummy, rFlagsReg cr)
15572 %{
15573 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15574 match(Set dummy (ClearArray (Binary cnt base) val));
15575 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15576
15577 format %{ $$template
15578 if (UseXMMForObjInit) {
15579 $$emit$$"movdq $tmp, $val\n\t"
15580 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15581 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15582 $$emit$$"jmpq L_zero_64_bytes\n\t"
15583 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15584 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15585 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15586 $$emit$$"add 0x40,rax\n\t"
15587 $$emit$$"# L_zero_64_bytes:\n\t"
15588 $$emit$$"sub 0x8,rcx\n\t"
15589 $$emit$$"jge L_loop\n\t"
15590 $$emit$$"add 0x4,rcx\n\t"
15591 $$emit$$"jl L_tail\n\t"
15592 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15593 $$emit$$"add 0x20,rax\n\t"
15594 $$emit$$"sub 0x4,rcx\n\t"
15595 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15596 $$emit$$"add 0x4,rcx\n\t"
15597 $$emit$$"jle L_end\n\t"
15598 $$emit$$"dec rcx\n\t"
15599 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15600 $$emit$$"vmovq xmm0,(rax)\n\t"
15601 $$emit$$"add 0x8,rax\n\t"
15602 $$emit$$"dec rcx\n\t"
15603 $$emit$$"jge L_sloop\n\t"
15604 $$emit$$"# L_end:\n\t"
15605 } else {
15606 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15607 }
15608 %}
15609 ins_encode %{
15610 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15611 $tmp$$XMMRegister, true, true);
15612 %}
15613 ins_pipe(pipe_slow);
15614 %}
15615
15616 // Large non-constant length ClearArray for AVX512 targets.
15617 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15618 Universe dummy, rFlagsReg cr)
15619 %{
15620 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15621 match(Set dummy (ClearArray (Binary cnt base) val));
15622 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15623
15624 format %{ $$template
15625 if (UseFastStosb) {
15626 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15627 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15628 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15629 } else if (UseXMMForObjInit) {
15630 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15631 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15632 $$emit$$"jmpq L_zero_64_bytes\n\t"
15633 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15634 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15635 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15636 $$emit$$"add 0x40,rax\n\t"
15637 $$emit$$"# L_zero_64_bytes:\n\t"
15638 $$emit$$"sub 0x8,rcx\n\t"
15639 $$emit$$"jge L_loop\n\t"
15640 $$emit$$"add 0x4,rcx\n\t"
15641 $$emit$$"jl L_tail\n\t"
15642 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15643 $$emit$$"add 0x20,rax\n\t"
15644 $$emit$$"sub 0x4,rcx\n\t"
15645 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15646 $$emit$$"add 0x4,rcx\n\t"
15647 $$emit$$"jle L_end\n\t"
15648 $$emit$$"dec rcx\n\t"
15649 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15650 $$emit$$"vmovq xmm0,(rax)\n\t"
15651 $$emit$$"add 0x8,rax\n\t"
15652 $$emit$$"dec rcx\n\t"
15653 $$emit$$"jge L_sloop\n\t"
15654 $$emit$$"# L_end:\n\t"
15655 } else {
15656 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15657 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15658 }
15659 %}
15660 ins_encode %{
15661 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15662 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15663 %}
15664 ins_pipe(pipe_slow);
15665 %}
15666
15667 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15668 Universe dummy, rFlagsReg cr)
15669 %{
15670 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15671 match(Set dummy (ClearArray (Binary cnt base) val));
15672 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15673
15674 format %{ $$template
15675 if (UseFastStosb) {
15676 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15677 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15678 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15679 } else if (UseXMMForObjInit) {
15680 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15681 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15682 $$emit$$"jmpq L_zero_64_bytes\n\t"
15683 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15684 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15685 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15686 $$emit$$"add 0x40,rax\n\t"
15687 $$emit$$"# L_zero_64_bytes:\n\t"
15688 $$emit$$"sub 0x8,rcx\n\t"
15689 $$emit$$"jge L_loop\n\t"
15690 $$emit$$"add 0x4,rcx\n\t"
15691 $$emit$$"jl L_tail\n\t"
15692 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15693 $$emit$$"add 0x20,rax\n\t"
15694 $$emit$$"sub 0x4,rcx\n\t"
15695 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15696 $$emit$$"add 0x4,rcx\n\t"
15697 $$emit$$"jle L_end\n\t"
15698 $$emit$$"dec rcx\n\t"
15699 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15700 $$emit$$"vmovq xmm0,(rax)\n\t"
15701 $$emit$$"add 0x8,rax\n\t"
15702 $$emit$$"dec rcx\n\t"
15703 $$emit$$"jge L_sloop\n\t"
15704 $$emit$$"# L_end:\n\t"
15705 } else {
15706 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15707 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15708 }
15709 %}
15710 ins_encode %{
15711 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15712 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15713 %}
15714 ins_pipe(pipe_slow);
15715 %}
15716
15717 // Small constant length ClearArray for AVX512 targets.
15718 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15719 %{
15720 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15721 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15722 match(Set dummy (ClearArray (Binary cnt base) val));
15723 ins_cost(100);
15724 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15725 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15726 ins_encode %{
15727 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15728 %}
15729 ins_pipe(pipe_slow);
15730 %}
15731
15732 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15733 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15734 %{
15735 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15736 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15737 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15738
15739 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15740 ins_encode %{
15741 __ string_compare($str1$$Register, $str2$$Register,
15742 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15743 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15744 %}
15745 ins_pipe( pipe_slow );
15746 %}
15747
15748 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15749 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15750 %{
15751 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15752 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15753 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15754
15755 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15756 ins_encode %{
15757 __ string_compare($str1$$Register, $str2$$Register,
15758 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15759 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15760 %}
15761 ins_pipe( pipe_slow );
15762 %}
15763
15764 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15765 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15766 %{
15767 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15768 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15769 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15770
15771 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15772 ins_encode %{
15773 __ string_compare($str1$$Register, $str2$$Register,
15774 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15775 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15776 %}
15777 ins_pipe( pipe_slow );
15778 %}
15779
15780 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15781 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15782 %{
15783 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15784 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15785 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15786
15787 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15788 ins_encode %{
15789 __ string_compare($str1$$Register, $str2$$Register,
15790 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15791 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15792 %}
15793 ins_pipe( pipe_slow );
15794 %}
15795
15796 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15797 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15798 %{
15799 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15800 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15801 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15802
15803 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15804 ins_encode %{
15805 __ string_compare($str1$$Register, $str2$$Register,
15806 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15807 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15808 %}
15809 ins_pipe( pipe_slow );
15810 %}
15811
15812 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15813 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15814 %{
15815 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15816 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15817 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15818
15819 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15820 ins_encode %{
15821 __ string_compare($str1$$Register, $str2$$Register,
15822 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15823 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15824 %}
15825 ins_pipe( pipe_slow );
15826 %}
15827
15828 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15829 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15830 %{
15831 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15832 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15833 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15834
15835 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15836 ins_encode %{
15837 __ string_compare($str2$$Register, $str1$$Register,
15838 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15839 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15840 %}
15841 ins_pipe( pipe_slow );
15842 %}
15843
15844 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15845 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15846 %{
15847 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15848 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15849 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15850
15851 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15852 ins_encode %{
15853 __ string_compare($str2$$Register, $str1$$Register,
15854 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15855 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15856 %}
15857 ins_pipe( pipe_slow );
15858 %}
15859
15860 // fast search of substring with known size.
15861 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15862 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15863 %{
15864 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15865 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15866 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15867
15868 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15869 ins_encode %{
15870 int icnt2 = (int)$int_cnt2$$constant;
15871 if (icnt2 >= 16) {
15872 // IndexOf for constant substrings with size >= 16 elements
15873 // which don't need to be loaded through stack.
15874 __ string_indexofC8($str1$$Register, $str2$$Register,
15875 $cnt1$$Register, $cnt2$$Register,
15876 icnt2, $result$$Register,
15877 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15878 } else {
15879 // Small strings are loaded through stack if they cross page boundary.
15880 __ string_indexof($str1$$Register, $str2$$Register,
15881 $cnt1$$Register, $cnt2$$Register,
15882 icnt2, $result$$Register,
15883 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15884 }
15885 %}
15886 ins_pipe( pipe_slow );
15887 %}
15888
15889 // fast search of substring with known size.
15890 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15891 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15892 %{
15893 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15894 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15895 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15896
15897 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15898 ins_encode %{
15899 int icnt2 = (int)$int_cnt2$$constant;
15900 if (icnt2 >= 8) {
15901 // IndexOf for constant substrings with size >= 8 elements
15902 // which don't need to be loaded through stack.
15903 __ string_indexofC8($str1$$Register, $str2$$Register,
15904 $cnt1$$Register, $cnt2$$Register,
15905 icnt2, $result$$Register,
15906 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15907 } else {
15908 // Small strings are loaded through stack if they cross page boundary.
15909 __ string_indexof($str1$$Register, $str2$$Register,
15910 $cnt1$$Register, $cnt2$$Register,
15911 icnt2, $result$$Register,
15912 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15913 }
15914 %}
15915 ins_pipe( pipe_slow );
15916 %}
15917
15918 // fast search of substring with known size.
15919 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15920 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15921 %{
15922 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15923 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15924 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15925
15926 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15927 ins_encode %{
15928 int icnt2 = (int)$int_cnt2$$constant;
15929 if (icnt2 >= 8) {
15930 // IndexOf for constant substrings with size >= 8 elements
15931 // which don't need to be loaded through stack.
15932 __ string_indexofC8($str1$$Register, $str2$$Register,
15933 $cnt1$$Register, $cnt2$$Register,
15934 icnt2, $result$$Register,
15935 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15936 } else {
15937 // Small strings are loaded through stack if they cross page boundary.
15938 __ string_indexof($str1$$Register, $str2$$Register,
15939 $cnt1$$Register, $cnt2$$Register,
15940 icnt2, $result$$Register,
15941 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15942 }
15943 %}
15944 ins_pipe( pipe_slow );
15945 %}
15946
15947 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15948 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15949 %{
15950 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15951 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15952 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15953
15954 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15955 ins_encode %{
15956 __ string_indexof($str1$$Register, $str2$$Register,
15957 $cnt1$$Register, $cnt2$$Register,
15958 (-1), $result$$Register,
15959 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15960 %}
15961 ins_pipe( pipe_slow );
15962 %}
15963
15964 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15965 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15966 %{
15967 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15968 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15969 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15970
15971 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15972 ins_encode %{
15973 __ string_indexof($str1$$Register, $str2$$Register,
15974 $cnt1$$Register, $cnt2$$Register,
15975 (-1), $result$$Register,
15976 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15977 %}
15978 ins_pipe( pipe_slow );
15979 %}
15980
15981 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15982 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15983 %{
15984 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15985 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15986 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15987
15988 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15989 ins_encode %{
15990 __ string_indexof($str1$$Register, $str2$$Register,
15991 $cnt1$$Register, $cnt2$$Register,
15992 (-1), $result$$Register,
15993 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15994 %}
15995 ins_pipe( pipe_slow );
15996 %}
15997
15998 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15999 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16000 %{
16001 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16002 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16003 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16004 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16005 ins_encode %{
16006 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16007 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16008 %}
16009 ins_pipe( pipe_slow );
16010 %}
16011
16012 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16013 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16014 %{
16015 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16016 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16017 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16018 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16019 ins_encode %{
16020 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16021 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16022 %}
16023 ins_pipe( pipe_slow );
16024 %}
16025
16026 // fast string equals
16027 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16028 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16029 %{
16030 predicate(!VM_Version::supports_avx512vlbw());
16031 match(Set result (StrEquals (Binary str1 str2) cnt));
16032 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16033
16034 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16035 ins_encode %{
16036 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16037 $cnt$$Register, $result$$Register, $tmp3$$Register,
16038 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16039 %}
16040 ins_pipe( pipe_slow );
16041 %}
16042
16043 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16044 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16045 %{
16046 predicate(VM_Version::supports_avx512vlbw());
16047 match(Set result (StrEquals (Binary str1 str2) cnt));
16048 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16049
16050 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16051 ins_encode %{
16052 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16053 $cnt$$Register, $result$$Register, $tmp3$$Register,
16054 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16055 %}
16056 ins_pipe( pipe_slow );
16057 %}
16058
16059 // fast array equals
16060 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16061 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16062 %{
16063 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16064 match(Set result (AryEq ary1 ary2));
16065 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16066
16067 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16068 ins_encode %{
16069 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16070 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16071 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16072 %}
16073 ins_pipe( pipe_slow );
16074 %}
16075
16076 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16077 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16078 %{
16079 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16080 match(Set result (AryEq ary1 ary2));
16081 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16082
16083 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16084 ins_encode %{
16085 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16086 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16087 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16088 %}
16089 ins_pipe( pipe_slow );
16090 %}
16091
16092 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16093 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16094 %{
16095 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16096 match(Set result (AryEq ary1 ary2));
16097 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16098
16099 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16100 ins_encode %{
16101 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16102 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16103 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16104 %}
16105 ins_pipe( pipe_slow );
16106 %}
16107
16108 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16109 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16110 %{
16111 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16112 match(Set result (AryEq ary1 ary2));
16113 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16114
16115 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16116 ins_encode %{
16117 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16118 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16119 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16120 %}
16121 ins_pipe( pipe_slow );
16122 %}
16123
16124 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16125 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16126 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16127 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16128 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16129 %{
16130 predicate(UseAVX >= 2);
16131 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16132 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16133 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16134 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16135 USE basic_type, KILL cr);
16136
16137 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16138 ins_encode %{
16139 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16140 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16141 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16142 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16143 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16144 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16145 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16146 %}
16147 ins_pipe( pipe_slow );
16148 %}
16149
16150 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16151 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16152 %{
16153 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16154 match(Set result (CountPositives ary1 len));
16155 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16156
16157 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16158 ins_encode %{
16159 __ count_positives($ary1$$Register, $len$$Register,
16160 $result$$Register, $tmp3$$Register,
16161 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16162 %}
16163 ins_pipe( pipe_slow );
16164 %}
16165
16166 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16167 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16168 %{
16169 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16170 match(Set result (CountPositives ary1 len));
16171 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16172
16173 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16174 ins_encode %{
16175 __ count_positives($ary1$$Register, $len$$Register,
16176 $result$$Register, $tmp3$$Register,
16177 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16178 %}
16179 ins_pipe( pipe_slow );
16180 %}
16181
16182 // fast char[] to byte[] compression
16183 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16184 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16185 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16186 match(Set result (StrCompressedCopy src (Binary dst len)));
16187 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16188 USE_KILL len, KILL tmp5, KILL cr);
16189
16190 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16191 ins_encode %{
16192 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16193 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16194 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16195 knoreg, knoreg);
16196 %}
16197 ins_pipe( pipe_slow );
16198 %}
16199
16200 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16201 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16202 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16203 match(Set result (StrCompressedCopy src (Binary dst len)));
16204 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16205 USE_KILL len, KILL tmp5, KILL cr);
16206
16207 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16208 ins_encode %{
16209 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16210 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16211 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16212 $ktmp1$$KRegister, $ktmp2$$KRegister);
16213 %}
16214 ins_pipe( pipe_slow );
16215 %}
16216 // fast byte[] to char[] inflation
16217 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16218 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16219 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16220 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16221 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16222
16223 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16224 ins_encode %{
16225 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16226 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16227 %}
16228 ins_pipe( pipe_slow );
16229 %}
16230
16231 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16232 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16233 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16234 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16235 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16236
16237 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16238 ins_encode %{
16239 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16240 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16241 %}
16242 ins_pipe( pipe_slow );
16243 %}
16244
16245 // encode char[] to byte[] in ISO_8859_1
16246 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16247 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16248 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16249 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16250 match(Set result (EncodeISOArray src (Binary dst len)));
16251 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16252
16253 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16254 ins_encode %{
16255 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16256 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16257 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16258 %}
16259 ins_pipe( pipe_slow );
16260 %}
16261
16262 // encode char[] to byte[] in ASCII
16263 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16264 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16265 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16266 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16267 match(Set result (EncodeISOArray src (Binary dst len)));
16268 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16269
16270 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16271 ins_encode %{
16272 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16273 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16274 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16275 %}
16276 ins_pipe( pipe_slow );
16277 %}
16278
16279 //----------Overflow Math Instructions-----------------------------------------
16280
16281 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16282 %{
16283 match(Set cr (OverflowAddI op1 op2));
16284 effect(DEF cr, USE_KILL op1, USE op2);
16285
16286 format %{ "addl $op1, $op2\t# overflow check int" %}
16287
16288 ins_encode %{
16289 __ addl($op1$$Register, $op2$$Register);
16290 %}
16291 ins_pipe(ialu_reg_reg);
16292 %}
16293
16294 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16295 %{
16296 match(Set cr (OverflowAddI op1 op2));
16297 effect(DEF cr, USE_KILL op1, USE op2);
16298
16299 format %{ "addl $op1, $op2\t# overflow check int" %}
16300
16301 ins_encode %{
16302 __ addl($op1$$Register, $op2$$constant);
16303 %}
16304 ins_pipe(ialu_reg_reg);
16305 %}
16306
16307 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16308 %{
16309 match(Set cr (OverflowAddL op1 op2));
16310 effect(DEF cr, USE_KILL op1, USE op2);
16311
16312 format %{ "addq $op1, $op2\t# overflow check long" %}
16313 ins_encode %{
16314 __ addq($op1$$Register, $op2$$Register);
16315 %}
16316 ins_pipe(ialu_reg_reg);
16317 %}
16318
16319 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16320 %{
16321 match(Set cr (OverflowAddL op1 op2));
16322 effect(DEF cr, USE_KILL op1, USE op2);
16323
16324 format %{ "addq $op1, $op2\t# overflow check long" %}
16325 ins_encode %{
16326 __ addq($op1$$Register, $op2$$constant);
16327 %}
16328 ins_pipe(ialu_reg_reg);
16329 %}
16330
16331 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16332 %{
16333 match(Set cr (OverflowSubI op1 op2));
16334
16335 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16336 ins_encode %{
16337 __ cmpl($op1$$Register, $op2$$Register);
16338 %}
16339 ins_pipe(ialu_reg_reg);
16340 %}
16341
16342 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16343 %{
16344 match(Set cr (OverflowSubI op1 op2));
16345
16346 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16347 ins_encode %{
16348 __ cmpl($op1$$Register, $op2$$constant);
16349 %}
16350 ins_pipe(ialu_reg_reg);
16351 %}
16352
16353 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16354 %{
16355 match(Set cr (OverflowSubL op1 op2));
16356
16357 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16358 ins_encode %{
16359 __ cmpq($op1$$Register, $op2$$Register);
16360 %}
16361 ins_pipe(ialu_reg_reg);
16362 %}
16363
16364 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16365 %{
16366 match(Set cr (OverflowSubL op1 op2));
16367
16368 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16369 ins_encode %{
16370 __ cmpq($op1$$Register, $op2$$constant);
16371 %}
16372 ins_pipe(ialu_reg_reg);
16373 %}
16374
16375 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16376 %{
16377 match(Set cr (OverflowSubI zero op2));
16378 effect(DEF cr, USE_KILL op2);
16379
16380 format %{ "negl $op2\t# overflow check int" %}
16381 ins_encode %{
16382 __ negl($op2$$Register);
16383 %}
16384 ins_pipe(ialu_reg_reg);
16385 %}
16386
16387 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16388 %{
16389 match(Set cr (OverflowSubL zero op2));
16390 effect(DEF cr, USE_KILL op2);
16391
16392 format %{ "negq $op2\t# overflow check long" %}
16393 ins_encode %{
16394 __ negq($op2$$Register);
16395 %}
16396 ins_pipe(ialu_reg_reg);
16397 %}
16398
16399 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16400 %{
16401 match(Set cr (OverflowMulI op1 op2));
16402 effect(DEF cr, USE_KILL op1, USE op2);
16403
16404 format %{ "imull $op1, $op2\t# overflow check int" %}
16405 ins_encode %{
16406 __ imull($op1$$Register, $op2$$Register);
16407 %}
16408 ins_pipe(ialu_reg_reg_alu0);
16409 %}
16410
16411 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16412 %{
16413 match(Set cr (OverflowMulI op1 op2));
16414 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16415
16416 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16417 ins_encode %{
16418 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16419 %}
16420 ins_pipe(ialu_reg_reg_alu0);
16421 %}
16422
16423 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16424 %{
16425 match(Set cr (OverflowMulL op1 op2));
16426 effect(DEF cr, USE_KILL op1, USE op2);
16427
16428 format %{ "imulq $op1, $op2\t# overflow check long" %}
16429 ins_encode %{
16430 __ imulq($op1$$Register, $op2$$Register);
16431 %}
16432 ins_pipe(ialu_reg_reg_alu0);
16433 %}
16434
16435 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16436 %{
16437 match(Set cr (OverflowMulL op1 op2));
16438 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16439
16440 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16441 ins_encode %{
16442 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16443 %}
16444 ins_pipe(ialu_reg_reg_alu0);
16445 %}
16446
16447
16448 //----------Control Flow Instructions------------------------------------------
16449 // Signed compare Instructions
16450
16451 // XXX more variants!!
16452 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16453 %{
16454 match(Set cr (CmpI op1 op2));
16455 effect(DEF cr, USE op1, USE op2);
16456
16457 format %{ "cmpl $op1, $op2" %}
16458 ins_encode %{
16459 __ cmpl($op1$$Register, $op2$$Register);
16460 %}
16461 ins_pipe(ialu_cr_reg_reg);
16462 %}
16463
16464 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16465 %{
16466 match(Set cr (CmpI op1 op2));
16467
16468 format %{ "cmpl $op1, $op2" %}
16469 ins_encode %{
16470 __ cmpl($op1$$Register, $op2$$constant);
16471 %}
16472 ins_pipe(ialu_cr_reg_imm);
16473 %}
16474
16475 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16476 %{
16477 match(Set cr (CmpI op1 (LoadI op2)));
16478
16479 ins_cost(500); // XXX
16480 format %{ "cmpl $op1, $op2" %}
16481 ins_encode %{
16482 __ cmpl($op1$$Register, $op2$$Address);
16483 %}
16484 ins_pipe(ialu_cr_reg_mem);
16485 %}
16486
16487 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16488 %{
16489 match(Set cr (CmpI src zero));
16490
16491 format %{ "testl $src, $src" %}
16492 ins_encode %{
16493 __ testl($src$$Register, $src$$Register);
16494 %}
16495 ins_pipe(ialu_cr_reg_imm);
16496 %}
16497
16498 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16499 %{
16500 match(Set cr (CmpI (AndI src con) zero));
16501
16502 format %{ "testl $src, $con" %}
16503 ins_encode %{
16504 __ testl($src$$Register, $con$$constant);
16505 %}
16506 ins_pipe(ialu_cr_reg_imm);
16507 %}
16508
16509 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16510 %{
16511 match(Set cr (CmpI (AndI src1 src2) zero));
16512
16513 format %{ "testl $src1, $src2" %}
16514 ins_encode %{
16515 __ testl($src1$$Register, $src2$$Register);
16516 %}
16517 ins_pipe(ialu_cr_reg_imm);
16518 %}
16519
16520 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16521 %{
16522 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16523
16524 format %{ "testl $src, $mem" %}
16525 ins_encode %{
16526 __ testl($src$$Register, $mem$$Address);
16527 %}
16528 ins_pipe(ialu_cr_reg_mem);
16529 %}
16530
16531 // Unsigned compare Instructions; really, same as signed except they
16532 // produce an rFlagsRegU instead of rFlagsReg.
16533 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16534 %{
16535 match(Set cr (CmpU op1 op2));
16536
16537 format %{ "cmpl $op1, $op2\t# unsigned" %}
16538 ins_encode %{
16539 __ cmpl($op1$$Register, $op2$$Register);
16540 %}
16541 ins_pipe(ialu_cr_reg_reg);
16542 %}
16543
16544 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16545 %{
16546 match(Set cr (CmpU op1 op2));
16547
16548 format %{ "cmpl $op1, $op2\t# unsigned" %}
16549 ins_encode %{
16550 __ cmpl($op1$$Register, $op2$$constant);
16551 %}
16552 ins_pipe(ialu_cr_reg_imm);
16553 %}
16554
16555 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16556 %{
16557 match(Set cr (CmpU op1 (LoadI op2)));
16558
16559 ins_cost(500); // XXX
16560 format %{ "cmpl $op1, $op2\t# unsigned" %}
16561 ins_encode %{
16562 __ cmpl($op1$$Register, $op2$$Address);
16563 %}
16564 ins_pipe(ialu_cr_reg_mem);
16565 %}
16566
16567 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16568 %{
16569 match(Set cr (CmpU src zero));
16570
16571 format %{ "testl $src, $src\t# unsigned" %}
16572 ins_encode %{
16573 __ testl($src$$Register, $src$$Register);
16574 %}
16575 ins_pipe(ialu_cr_reg_imm);
16576 %}
16577
16578 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16579 %{
16580 match(Set cr (CmpP op1 op2));
16581
16582 format %{ "cmpq $op1, $op2\t# ptr" %}
16583 ins_encode %{
16584 __ cmpq($op1$$Register, $op2$$Register);
16585 %}
16586 ins_pipe(ialu_cr_reg_reg);
16587 %}
16588
16589 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16590 %{
16591 match(Set cr (CmpP op1 (LoadP op2)));
16592 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16593
16594 ins_cost(500); // XXX
16595 format %{ "cmpq $op1, $op2\t# ptr" %}
16596 ins_encode %{
16597 __ cmpq($op1$$Register, $op2$$Address);
16598 %}
16599 ins_pipe(ialu_cr_reg_mem);
16600 %}
16601
16602 // XXX this is generalized by compP_rReg_mem???
16603 // Compare raw pointer (used in out-of-heap check).
16604 // Only works because non-oop pointers must be raw pointers
16605 // and raw pointers have no anti-dependencies.
16606 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16607 %{
16608 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16609 n->in(2)->as_Load()->barrier_data() == 0);
16610 match(Set cr (CmpP op1 (LoadP op2)));
16611
16612 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16613 ins_encode %{
16614 __ cmpq($op1$$Register, $op2$$Address);
16615 %}
16616 ins_pipe(ialu_cr_reg_mem);
16617 %}
16618
16619 // This will generate a signed flags result. This should be OK since
16620 // any compare to a zero should be eq/neq.
16621 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16622 %{
16623 match(Set cr (CmpP src zero));
16624
16625 format %{ "testq $src, $src\t# ptr" %}
16626 ins_encode %{
16627 __ testq($src$$Register, $src$$Register);
16628 %}
16629 ins_pipe(ialu_cr_reg_imm);
16630 %}
16631
16632 // This will generate a signed flags result. This should be OK since
16633 // any compare to a zero should be eq/neq.
16634 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16635 %{
16636 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16637 n->in(1)->as_Load()->barrier_data() == 0);
16638 match(Set cr (CmpP (LoadP op) zero));
16639
16640 ins_cost(500); // XXX
16641 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16642 ins_encode %{
16643 __ testq($op$$Address, 0xFFFFFFFF);
16644 %}
16645 ins_pipe(ialu_cr_reg_imm);
16646 %}
16647
16648 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16649 %{
16650 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16651 n->in(1)->as_Load()->barrier_data() == 0);
16652 match(Set cr (CmpP (LoadP mem) zero));
16653
16654 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16655 ins_encode %{
16656 __ cmpq(r12, $mem$$Address);
16657 %}
16658 ins_pipe(ialu_cr_reg_mem);
16659 %}
16660
16661 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16662 %{
16663 match(Set cr (CmpN op1 op2));
16664
16665 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16666 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16667 ins_pipe(ialu_cr_reg_reg);
16668 %}
16669
16670 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16671 %{
16672 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16673 match(Set cr (CmpN src (LoadN mem)));
16674
16675 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16676 ins_encode %{
16677 __ cmpl($src$$Register, $mem$$Address);
16678 %}
16679 ins_pipe(ialu_cr_reg_mem);
16680 %}
16681
16682 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16683 match(Set cr (CmpN op1 op2));
16684
16685 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16686 ins_encode %{
16687 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16688 %}
16689 ins_pipe(ialu_cr_reg_imm);
16690 %}
16691
16692 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16693 %{
16694 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16695 match(Set cr (CmpN src (LoadN mem)));
16696
16697 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16698 ins_encode %{
16699 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16700 %}
16701 ins_pipe(ialu_cr_reg_mem);
16702 %}
16703
16704 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16705 match(Set cr (CmpN op1 op2));
16706
16707 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16708 ins_encode %{
16709 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16710 %}
16711 ins_pipe(ialu_cr_reg_imm);
16712 %}
16713
16714 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16715 %{
16716 predicate(!UseCompactObjectHeaders);
16717 match(Set cr (CmpN src (LoadNKlass mem)));
16718
16719 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16720 ins_encode %{
16721 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16722 %}
16723 ins_pipe(ialu_cr_reg_mem);
16724 %}
16725
16726 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16727 match(Set cr (CmpN src zero));
16728
16729 format %{ "testl $src, $src\t# compressed ptr" %}
16730 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16731 ins_pipe(ialu_cr_reg_imm);
16732 %}
16733
16734 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16735 %{
16736 predicate(CompressedOops::base() != nullptr &&
16737 n->in(1)->as_Load()->barrier_data() == 0);
16738 match(Set cr (CmpN (LoadN mem) zero));
16739
16740 ins_cost(500); // XXX
16741 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16742 ins_encode %{
16743 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16744 %}
16745 ins_pipe(ialu_cr_reg_mem);
16746 %}
16747
16748 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16749 %{
16750 predicate(CompressedOops::base() == nullptr &&
16751 n->in(1)->as_Load()->barrier_data() == 0);
16752 match(Set cr (CmpN (LoadN mem) zero));
16753
16754 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16755 ins_encode %{
16756 __ cmpl(r12, $mem$$Address);
16757 %}
16758 ins_pipe(ialu_cr_reg_mem);
16759 %}
16760
16761 // Yanked all unsigned pointer compare operations.
16762 // Pointer compares are done with CmpP which is already unsigned.
16763
16764 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16765 %{
16766 match(Set cr (CmpL op1 op2));
16767
16768 format %{ "cmpq $op1, $op2" %}
16769 ins_encode %{
16770 __ cmpq($op1$$Register, $op2$$Register);
16771 %}
16772 ins_pipe(ialu_cr_reg_reg);
16773 %}
16774
16775 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16776 %{
16777 match(Set cr (CmpL op1 op2));
16778
16779 format %{ "cmpq $op1, $op2" %}
16780 ins_encode %{
16781 __ cmpq($op1$$Register, $op2$$constant);
16782 %}
16783 ins_pipe(ialu_cr_reg_imm);
16784 %}
16785
16786 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16787 %{
16788 match(Set cr (CmpL op1 (LoadL op2)));
16789
16790 format %{ "cmpq $op1, $op2" %}
16791 ins_encode %{
16792 __ cmpq($op1$$Register, $op2$$Address);
16793 %}
16794 ins_pipe(ialu_cr_reg_mem);
16795 %}
16796
16797 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16798 %{
16799 match(Set cr (CmpL src zero));
16800
16801 format %{ "testq $src, $src" %}
16802 ins_encode %{
16803 __ testq($src$$Register, $src$$Register);
16804 %}
16805 ins_pipe(ialu_cr_reg_imm);
16806 %}
16807
16808 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16809 %{
16810 match(Set cr (CmpL (AndL src con) zero));
16811
16812 format %{ "testq $src, $con\t# long" %}
16813 ins_encode %{
16814 __ testq($src$$Register, $con$$constant);
16815 %}
16816 ins_pipe(ialu_cr_reg_imm);
16817 %}
16818
16819 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16820 %{
16821 match(Set cr (CmpL (AndL src1 src2) zero));
16822
16823 format %{ "testq $src1, $src2\t# long" %}
16824 ins_encode %{
16825 __ testq($src1$$Register, $src2$$Register);
16826 %}
16827 ins_pipe(ialu_cr_reg_imm);
16828 %}
16829
16830 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16831 %{
16832 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16833
16834 format %{ "testq $src, $mem" %}
16835 ins_encode %{
16836 __ testq($src$$Register, $mem$$Address);
16837 %}
16838 ins_pipe(ialu_cr_reg_mem);
16839 %}
16840
16841 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16842 %{
16843 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16844
16845 format %{ "testq $src, $mem" %}
16846 ins_encode %{
16847 __ testq($src$$Register, $mem$$Address);
16848 %}
16849 ins_pipe(ialu_cr_reg_mem);
16850 %}
16851
16852 // Manifest a CmpU result in an integer register. Very painful.
16853 // This is the test to avoid.
16854 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16855 %{
16856 match(Set dst (CmpU3 src1 src2));
16857 effect(KILL flags);
16858
16859 ins_cost(275); // XXX
16860 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16861 "movl $dst, -1\n\t"
16862 "jb,u done\n\t"
16863 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16864 "done:" %}
16865 ins_encode %{
16866 Label done;
16867 __ cmpl($src1$$Register, $src2$$Register);
16868 __ movl($dst$$Register, -1);
16869 __ jccb(Assembler::below, done);
16870 __ setcc(Assembler::notZero, $dst$$Register);
16871 __ bind(done);
16872 %}
16873 ins_pipe(pipe_slow);
16874 %}
16875
16876 // Manifest a CmpL result in an integer register. Very painful.
16877 // This is the test to avoid.
16878 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16879 %{
16880 match(Set dst (CmpL3 src1 src2));
16881 effect(KILL flags);
16882
16883 ins_cost(275); // XXX
16884 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16885 "movl $dst, -1\n\t"
16886 "jl,s done\n\t"
16887 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16888 "done:" %}
16889 ins_encode %{
16890 Label done;
16891 __ cmpq($src1$$Register, $src2$$Register);
16892 __ movl($dst$$Register, -1);
16893 __ jccb(Assembler::less, done);
16894 __ setcc(Assembler::notZero, $dst$$Register);
16895 __ bind(done);
16896 %}
16897 ins_pipe(pipe_slow);
16898 %}
16899
16900 // Manifest a CmpUL result in an integer register. Very painful.
16901 // This is the test to avoid.
16902 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16903 %{
16904 match(Set dst (CmpUL3 src1 src2));
16905 effect(KILL flags);
16906
16907 ins_cost(275); // XXX
16908 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16909 "movl $dst, -1\n\t"
16910 "jb,u done\n\t"
16911 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16912 "done:" %}
16913 ins_encode %{
16914 Label done;
16915 __ cmpq($src1$$Register, $src2$$Register);
16916 __ movl($dst$$Register, -1);
16917 __ jccb(Assembler::below, done);
16918 __ setcc(Assembler::notZero, $dst$$Register);
16919 __ bind(done);
16920 %}
16921 ins_pipe(pipe_slow);
16922 %}
16923
16924 // Unsigned long compare Instructions; really, same as signed long except they
16925 // produce an rFlagsRegU instead of rFlagsReg.
16926 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16927 %{
16928 match(Set cr (CmpUL op1 op2));
16929
16930 format %{ "cmpq $op1, $op2\t# unsigned" %}
16931 ins_encode %{
16932 __ cmpq($op1$$Register, $op2$$Register);
16933 %}
16934 ins_pipe(ialu_cr_reg_reg);
16935 %}
16936
16937 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16938 %{
16939 match(Set cr (CmpUL op1 op2));
16940
16941 format %{ "cmpq $op1, $op2\t# unsigned" %}
16942 ins_encode %{
16943 __ cmpq($op1$$Register, $op2$$constant);
16944 %}
16945 ins_pipe(ialu_cr_reg_imm);
16946 %}
16947
16948 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16949 %{
16950 match(Set cr (CmpUL op1 (LoadL op2)));
16951
16952 format %{ "cmpq $op1, $op2\t# unsigned" %}
16953 ins_encode %{
16954 __ cmpq($op1$$Register, $op2$$Address);
16955 %}
16956 ins_pipe(ialu_cr_reg_mem);
16957 %}
16958
16959 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16960 %{
16961 match(Set cr (CmpUL src zero));
16962
16963 format %{ "testq $src, $src\t# unsigned" %}
16964 ins_encode %{
16965 __ testq($src$$Register, $src$$Register);
16966 %}
16967 ins_pipe(ialu_cr_reg_imm);
16968 %}
16969
16970 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16971 %{
16972 match(Set cr (CmpI (LoadB mem) imm));
16973
16974 ins_cost(125);
16975 format %{ "cmpb $mem, $imm" %}
16976 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16977 ins_pipe(ialu_cr_reg_mem);
16978 %}
16979
16980 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16981 %{
16982 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16983
16984 ins_cost(125);
16985 format %{ "testb $mem, $imm\t# ubyte" %}
16986 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16987 ins_pipe(ialu_cr_reg_mem);
16988 %}
16989
16990 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16991 %{
16992 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16993
16994 ins_cost(125);
16995 format %{ "testb $mem, $imm\t# byte" %}
16996 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16997 ins_pipe(ialu_cr_reg_mem);
16998 %}
16999
17000 //----------Max and Min--------------------------------------------------------
17001 // Min Instructions
17002
17003 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17004 %{
17005 predicate(!UseAPX);
17006 effect(USE_DEF dst, USE src, USE cr);
17007
17008 format %{ "cmovlgt $dst, $src\t# min" %}
17009 ins_encode %{
17010 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17011 %}
17012 ins_pipe(pipe_cmov_reg);
17013 %}
17014
17015 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17016 %{
17017 predicate(UseAPX);
17018 effect(DEF dst, USE src1, USE src2, USE cr);
17019
17020 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17021 ins_encode %{
17022 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17023 %}
17024 ins_pipe(pipe_cmov_reg);
17025 %}
17026
17027 instruct minI_rReg(rRegI dst, rRegI src)
17028 %{
17029 predicate(!UseAPX);
17030 match(Set dst (MinI dst src));
17031
17032 ins_cost(200);
17033 expand %{
17034 rFlagsReg cr;
17035 compI_rReg(cr, dst, src);
17036 cmovI_reg_g(dst, src, cr);
17037 %}
17038 %}
17039
17040 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17041 %{
17042 predicate(UseAPX);
17043 match(Set dst (MinI src1 src2));
17044 effect(DEF dst, USE src1, USE src2);
17045 flag(PD::Flag_ndd_demotable_opr1);
17046
17047 ins_cost(200);
17048 expand %{
17049 rFlagsReg cr;
17050 compI_rReg(cr, src1, src2);
17051 cmovI_reg_g_ndd(dst, src1, src2, cr);
17052 %}
17053 %}
17054
17055 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17056 %{
17057 predicate(!UseAPX);
17058 effect(USE_DEF dst, USE src, USE cr);
17059
17060 format %{ "cmovllt $dst, $src\t# max" %}
17061 ins_encode %{
17062 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17063 %}
17064 ins_pipe(pipe_cmov_reg);
17065 %}
17066
17067 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17068 %{
17069 predicate(UseAPX);
17070 effect(DEF dst, USE src1, USE src2, USE cr);
17071
17072 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17073 ins_encode %{
17074 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17075 %}
17076 ins_pipe(pipe_cmov_reg);
17077 %}
17078
17079 instruct maxI_rReg(rRegI dst, rRegI src)
17080 %{
17081 predicate(!UseAPX);
17082 match(Set dst (MaxI dst src));
17083
17084 ins_cost(200);
17085 expand %{
17086 rFlagsReg cr;
17087 compI_rReg(cr, dst, src);
17088 cmovI_reg_l(dst, src, cr);
17089 %}
17090 %}
17091
17092 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17093 %{
17094 predicate(UseAPX);
17095 match(Set dst (MaxI src1 src2));
17096 effect(DEF dst, USE src1, USE src2);
17097 flag(PD::Flag_ndd_demotable_opr1);
17098
17099 ins_cost(200);
17100 expand %{
17101 rFlagsReg cr;
17102 compI_rReg(cr, src1, src2);
17103 cmovI_reg_l_ndd(dst, src1, src2, cr);
17104 %}
17105 %}
17106
17107 // ============================================================================
17108 // Branch Instructions
17109
17110 // Jump Direct - Label defines a relative address from JMP+1
17111 instruct jmpDir(label labl)
17112 %{
17113 match(Goto);
17114 effect(USE labl);
17115
17116 ins_cost(300);
17117 format %{ "jmp $labl" %}
17118 size(5);
17119 ins_encode %{
17120 Label* L = $labl$$label;
17121 __ jmp(*L, false); // Always long jump
17122 %}
17123 ins_pipe(pipe_jmp);
17124 %}
17125
17126 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17127 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17128 %{
17129 match(If cop cr);
17130 effect(USE labl);
17131
17132 ins_cost(300);
17133 format %{ "j$cop $labl" %}
17134 size(6);
17135 ins_encode %{
17136 Label* L = $labl$$label;
17137 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17138 %}
17139 ins_pipe(pipe_jcc);
17140 %}
17141
17142 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17143 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17144 %{
17145 match(CountedLoopEnd cop cr);
17146 effect(USE labl);
17147
17148 ins_cost(300);
17149 format %{ "j$cop $labl\t# loop end" %}
17150 size(6);
17151 ins_encode %{
17152 Label* L = $labl$$label;
17153 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17154 %}
17155 ins_pipe(pipe_jcc);
17156 %}
17157
17158 // Jump Direct Conditional - using unsigned comparison
17159 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17160 match(If cop cmp);
17161 effect(USE labl);
17162
17163 ins_cost(300);
17164 format %{ "j$cop,u $labl" %}
17165 size(6);
17166 ins_encode %{
17167 Label* L = $labl$$label;
17168 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17169 %}
17170 ins_pipe(pipe_jcc);
17171 %}
17172
17173 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17174 match(If cop cmp);
17175 effect(USE labl);
17176
17177 ins_cost(200);
17178 format %{ "j$cop,u $labl" %}
17179 size(6);
17180 ins_encode %{
17181 Label* L = $labl$$label;
17182 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17183 %}
17184 ins_pipe(pipe_jcc);
17185 %}
17186
17187 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17188 match(If cop cmp);
17189 effect(USE labl);
17190
17191 ins_cost(200);
17192 format %{ $$template
17193 if ($cop$$cmpcode == Assembler::notEqual) {
17194 $$emit$$"jp,u $labl\n\t"
17195 $$emit$$"j$cop,u $labl"
17196 } else {
17197 $$emit$$"jp,u done\n\t"
17198 $$emit$$"j$cop,u $labl\n\t"
17199 $$emit$$"done:"
17200 }
17201 %}
17202 ins_encode %{
17203 Label* l = $labl$$label;
17204 if ($cop$$cmpcode == Assembler::notEqual) {
17205 __ jcc(Assembler::parity, *l, false);
17206 __ jcc(Assembler::notEqual, *l, false);
17207 } else if ($cop$$cmpcode == Assembler::equal) {
17208 Label done;
17209 __ jccb(Assembler::parity, done);
17210 __ jcc(Assembler::equal, *l, false);
17211 __ bind(done);
17212 } else {
17213 ShouldNotReachHere();
17214 }
17215 %}
17216 ins_pipe(pipe_jcc);
17217 %}
17218
17219 // Jump Direct Conditional - using signed and unsigned comparison
17220 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17221 match(If cop cmp);
17222 effect(USE labl);
17223
17224 ins_cost(200);
17225 format %{ "j$cop,su $labl" %}
17226 size(6);
17227 ins_encode %{
17228 Label* L = $labl$$label;
17229 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17230 %}
17231 ins_pipe(pipe_jcc);
17232 %}
17233
17234 // ============================================================================
17235 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17236 // superklass array for an instance of the superklass. Set a hidden
17237 // internal cache on a hit (cache is checked with exposed code in
17238 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17239 // encoding ALSO sets flags.
17240
17241 instruct partialSubtypeCheck(rdi_RegP result,
17242 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17243 rFlagsReg cr)
17244 %{
17245 match(Set result (PartialSubtypeCheck sub super));
17246 predicate(!UseSecondarySupersTable);
17247 effect(KILL rcx, KILL cr);
17248
17249 ins_cost(1100); // slightly larger than the next version
17250 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17251 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17252 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17253 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17254 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17255 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17256 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17257 "miss:\t" %}
17258
17259 ins_encode %{
17260 Label miss;
17261 // NB: Callers may assume that, when $result is a valid register,
17262 // check_klass_subtype_slow_path_linear sets it to a nonzero
17263 // value.
17264 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17265 $rcx$$Register, $result$$Register,
17266 nullptr, &miss,
17267 /*set_cond_codes:*/ true);
17268 __ xorptr($result$$Register, $result$$Register);
17269 __ bind(miss);
17270 %}
17271
17272 ins_pipe(pipe_slow);
17273 %}
17274
17275 // ============================================================================
17276 // Two versions of hashtable-based partialSubtypeCheck, both used when
17277 // we need to search for a super class in the secondary supers array.
17278 // The first is used when we don't know _a priori_ the class being
17279 // searched for. The second, far more common, is used when we do know:
17280 // this is used for instanceof, checkcast, and any case where C2 can
17281 // determine it by constant propagation.
17282
17283 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17284 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17285 rFlagsReg cr)
17286 %{
17287 match(Set result (PartialSubtypeCheck sub super));
17288 predicate(UseSecondarySupersTable);
17289 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17290
17291 ins_cost(1000);
17292 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17293
17294 ins_encode %{
17295 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17296 $temp3$$Register, $temp4$$Register, $result$$Register);
17297 %}
17298
17299 ins_pipe(pipe_slow);
17300 %}
17301
17302 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17303 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17304 rFlagsReg cr)
17305 %{
17306 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17307 predicate(UseSecondarySupersTable);
17308 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17309
17310 ins_cost(700); // smaller than the next version
17311 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17312
17313 ins_encode %{
17314 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17315 if (InlineSecondarySupersTest) {
17316 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17317 $temp3$$Register, $temp4$$Register, $result$$Register,
17318 super_klass_slot);
17319 } else {
17320 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17321 }
17322 %}
17323
17324 ins_pipe(pipe_slow);
17325 %}
17326
17327 // ============================================================================
17328 // Branch Instructions -- short offset versions
17329 //
17330 // These instructions are used to replace jumps of a long offset (the default
17331 // match) with jumps of a shorter offset. These instructions are all tagged
17332 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17333 // match rules in general matching. Instead, the ADLC generates a conversion
17334 // method in the MachNode which can be used to do in-place replacement of the
17335 // long variant with the shorter variant. The compiler will determine if a
17336 // branch can be taken by the is_short_branch_offset() predicate in the machine
17337 // specific code section of the file.
17338
17339 // Jump Direct - Label defines a relative address from JMP+1
17340 instruct jmpDir_short(label labl) %{
17341 match(Goto);
17342 effect(USE labl);
17343
17344 ins_cost(300);
17345 format %{ "jmp,s $labl" %}
17346 size(2);
17347 ins_encode %{
17348 Label* L = $labl$$label;
17349 __ jmpb(*L);
17350 %}
17351 ins_pipe(pipe_jmp);
17352 ins_short_branch(1);
17353 %}
17354
17355 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17356 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17357 match(If cop cr);
17358 effect(USE labl);
17359
17360 ins_cost(300);
17361 format %{ "j$cop,s $labl" %}
17362 size(2);
17363 ins_encode %{
17364 Label* L = $labl$$label;
17365 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17366 %}
17367 ins_pipe(pipe_jcc);
17368 ins_short_branch(1);
17369 %}
17370
17371 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17372 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17373 match(CountedLoopEnd cop cr);
17374 effect(USE labl);
17375
17376 ins_cost(300);
17377 format %{ "j$cop,s $labl\t# loop end" %}
17378 size(2);
17379 ins_encode %{
17380 Label* L = $labl$$label;
17381 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17382 %}
17383 ins_pipe(pipe_jcc);
17384 ins_short_branch(1);
17385 %}
17386
17387 // Jump Direct Conditional - using unsigned comparison
17388 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17389 match(If cop cmp);
17390 effect(USE labl);
17391
17392 ins_cost(300);
17393 format %{ "j$cop,us $labl" %}
17394 size(2);
17395 ins_encode %{
17396 Label* L = $labl$$label;
17397 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17398 %}
17399 ins_pipe(pipe_jcc);
17400 ins_short_branch(1);
17401 %}
17402
17403 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17404 match(If cop cmp);
17405 effect(USE labl);
17406
17407 ins_cost(300);
17408 format %{ "j$cop,us $labl" %}
17409 size(2);
17410 ins_encode %{
17411 Label* L = $labl$$label;
17412 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17413 %}
17414 ins_pipe(pipe_jcc);
17415 ins_short_branch(1);
17416 %}
17417
17418 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17419 match(If cop cmp);
17420 effect(USE labl);
17421
17422 ins_cost(300);
17423 format %{ $$template
17424 if ($cop$$cmpcode == Assembler::notEqual) {
17425 $$emit$$"jp,u,s $labl\n\t"
17426 $$emit$$"j$cop,u,s $labl"
17427 } else {
17428 $$emit$$"jp,u,s done\n\t"
17429 $$emit$$"j$cop,u,s $labl\n\t"
17430 $$emit$$"done:"
17431 }
17432 %}
17433 size(4);
17434 ins_encode %{
17435 Label* l = $labl$$label;
17436 if ($cop$$cmpcode == Assembler::notEqual) {
17437 __ jccb(Assembler::parity, *l);
17438 __ jccb(Assembler::notEqual, *l);
17439 } else if ($cop$$cmpcode == Assembler::equal) {
17440 Label done;
17441 __ jccb(Assembler::parity, done);
17442 __ jccb(Assembler::equal, *l);
17443 __ bind(done);
17444 } else {
17445 ShouldNotReachHere();
17446 }
17447 %}
17448 ins_pipe(pipe_jcc);
17449 ins_short_branch(1);
17450 %}
17451
17452 // Jump Direct Conditional - using signed and unsigned comparison
17453 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17454 match(If cop cmp);
17455 effect(USE labl);
17456
17457 ins_cost(300);
17458 format %{ "j$cop,sus $labl" %}
17459 size(2);
17460 ins_encode %{
17461 Label* L = $labl$$label;
17462 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17463 %}
17464 ins_pipe(pipe_jcc);
17465 ins_short_branch(1);
17466 %}
17467
17468 // ============================================================================
17469 // inlined locking and unlocking
17470
17471 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17472 match(Set cr (FastLock object box));
17473 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17474 ins_cost(300);
17475 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17476 ins_encode %{
17477 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17478 %}
17479 ins_pipe(pipe_slow);
17480 %}
17481
17482 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17483 match(Set cr (FastUnlock object rax_reg));
17484 effect(TEMP tmp, USE_KILL rax_reg);
17485 ins_cost(300);
17486 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17487 ins_encode %{
17488 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17489 %}
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493
17494 // ============================================================================
17495 // Safepoint Instructions
17496 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17497 %{
17498 match(SafePoint poll);
17499 effect(KILL cr, USE poll);
17500
17501 format %{ "testl rax, [$poll]\t"
17502 "# Safepoint: poll for GC" %}
17503 ins_cost(125);
17504 ins_encode %{
17505 __ relocate(relocInfo::poll_type);
17506 address pre_pc = __ pc();
17507 __ testl(rax, Address($poll$$Register, 0));
17508 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17509 %}
17510 ins_pipe(ialu_reg_mem);
17511 %}
17512
17513 instruct mask_all_evexL(kReg dst, rRegL src) %{
17514 match(Set dst (MaskAll src));
17515 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17516 ins_encode %{
17517 int mask_len = Matcher::vector_length(this);
17518 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17519 %}
17520 ins_pipe( pipe_slow );
17521 %}
17522
17523 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17524 predicate(Matcher::vector_length(n) > 32);
17525 match(Set dst (MaskAll src));
17526 effect(TEMP tmp);
17527 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17528 ins_encode %{
17529 int mask_len = Matcher::vector_length(this);
17530 __ movslq($tmp$$Register, $src$$Register);
17531 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17532 %}
17533 ins_pipe( pipe_slow );
17534 %}
17535
17536 // ============================================================================
17537 // Procedure Call/Return Instructions
17538 // Call Java Static Instruction
17539 // Note: If this code changes, the corresponding ret_addr_offset() and
17540 // compute_padding() functions will have to be adjusted.
17541 instruct CallStaticJavaDirect(method meth) %{
17542 match(CallStaticJava);
17543 effect(USE meth);
17544
17545 ins_cost(300);
17546 format %{ "call,static " %}
17547 opcode(0xE8); /* E8 cd */
17548 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17549 ins_pipe(pipe_slow);
17550 ins_alignment(4);
17551 %}
17552
17553 // Call Java Dynamic Instruction
17554 // Note: If this code changes, the corresponding ret_addr_offset() and
17555 // compute_padding() functions will have to be adjusted.
17556 instruct CallDynamicJavaDirect(method meth)
17557 %{
17558 match(CallDynamicJava);
17559 effect(USE meth);
17560
17561 ins_cost(300);
17562 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17563 "call,dynamic " %}
17564 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17565 ins_pipe(pipe_slow);
17566 ins_alignment(4);
17567 %}
17568
17569 // Call Runtime Instruction
17570 instruct CallRuntimeDirect(method meth)
17571 %{
17572 match(CallRuntime);
17573 effect(USE meth);
17574
17575 ins_cost(300);
17576 format %{ "call,runtime " %}
17577 ins_encode(clear_avx, Java_To_Runtime(meth));
17578 ins_pipe(pipe_slow);
17579 %}
17580
17581 // Call runtime without safepoint
17582 instruct CallLeafDirect(method meth)
17583 %{
17584 match(CallLeaf);
17585 effect(USE meth);
17586
17587 ins_cost(300);
17588 format %{ "call_leaf,runtime " %}
17589 ins_encode(clear_avx, Java_To_Runtime(meth));
17590 ins_pipe(pipe_slow);
17591 %}
17592
17593 // Call runtime without safepoint and with vector arguments
17594 instruct CallLeafDirectVector(method meth)
17595 %{
17596 match(CallLeafVector);
17597 effect(USE meth);
17598
17599 ins_cost(300);
17600 format %{ "call_leaf,vector " %}
17601 ins_encode(Java_To_Runtime(meth));
17602 ins_pipe(pipe_slow);
17603 %}
17604
17605 // Call runtime without safepoint
17606 // entry point is null, target holds the address to call
17607 instruct CallLeafNoFPInDirect(rRegP target)
17608 %{
17609 predicate(n->as_Call()->entry_point() == nullptr);
17610 match(CallLeafNoFP target);
17611
17612 ins_cost(300);
17613 format %{ "call_leaf_nofp,runtime indirect " %}
17614 ins_encode %{
17615 __ call($target$$Register);
17616 %}
17617
17618 ins_pipe(pipe_slow);
17619 %}
17620
17621 // Call runtime without safepoint
17622 instruct CallLeafNoFPDirect(method meth)
17623 %{
17624 predicate(n->as_Call()->entry_point() != nullptr);
17625 match(CallLeafNoFP);
17626 effect(USE meth);
17627
17628 ins_cost(300);
17629 format %{ "call_leaf_nofp,runtime " %}
17630 ins_encode(clear_avx, Java_To_Runtime(meth));
17631 ins_pipe(pipe_slow);
17632 %}
17633
17634 // Return Instruction
17635 // Remove the return address & jump to it.
17636 // Notice: We always emit a nop after a ret to make sure there is room
17637 // for safepoint patching
17638 instruct Ret()
17639 %{
17640 match(Return);
17641
17642 format %{ "ret" %}
17643 ins_encode %{
17644 __ ret(0);
17645 %}
17646 ins_pipe(pipe_jmp);
17647 %}
17648
17649 // Tail Call; Jump from runtime stub to Java code.
17650 // Also known as an 'interprocedural jump'.
17651 // Target of jump will eventually return to caller.
17652 // TailJump below removes the return address.
17653 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17654 // emitted just above the TailCall which has reset rbp to the caller state.
17655 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17656 %{
17657 match(TailCall jump_target method_ptr);
17658
17659 ins_cost(300);
17660 format %{ "jmp $jump_target\t# rbx holds method" %}
17661 ins_encode %{
17662 __ jmp($jump_target$$Register);
17663 %}
17664 ins_pipe(pipe_jmp);
17665 %}
17666
17667 // Tail Jump; remove the return address; jump to target.
17668 // TailCall above leaves the return address around.
17669 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17670 %{
17671 match(TailJump jump_target ex_oop);
17672
17673 ins_cost(300);
17674 format %{ "popq rdx\t# pop return address\n\t"
17675 "jmp $jump_target" %}
17676 ins_encode %{
17677 __ popq(as_Register(RDX_enc));
17678 __ jmp($jump_target$$Register);
17679 %}
17680 ins_pipe(pipe_jmp);
17681 %}
17682
17683 // Forward exception.
17684 instruct ForwardExceptionjmp()
17685 %{
17686 match(ForwardException);
17687
17688 format %{ "jmp forward_exception_stub" %}
17689 ins_encode %{
17690 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17691 %}
17692 ins_pipe(pipe_jmp);
17693 %}
17694
17695 // Create exception oop: created by stack-crawling runtime code.
17696 // Created exception is now available to this handler, and is setup
17697 // just prior to jumping to this handler. No code emitted.
17698 instruct CreateException(rax_RegP ex_oop)
17699 %{
17700 match(Set ex_oop (CreateEx));
17701
17702 size(0);
17703 // use the following format syntax
17704 format %{ "# exception oop is in rax; no code emitted" %}
17705 ins_encode();
17706 ins_pipe(empty);
17707 %}
17708
17709 // Rethrow exception:
17710 // The exception oop will come in the first argument position.
17711 // Then JUMP (not call) to the rethrow stub code.
17712 instruct RethrowException()
17713 %{
17714 match(Rethrow);
17715
17716 // use the following format syntax
17717 format %{ "jmp rethrow_stub" %}
17718 ins_encode %{
17719 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17720 %}
17721 ins_pipe(pipe_jmp);
17722 %}
17723
17724 // ============================================================================
17725 // This name is KNOWN by the ADLC and cannot be changed.
17726 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17727 // for this guy.
17728 instruct tlsLoadP(r15_RegP dst) %{
17729 match(Set dst (ThreadLocal));
17730 effect(DEF dst);
17731
17732 size(0);
17733 format %{ "# TLS is in R15" %}
17734 ins_encode( /*empty encoding*/ );
17735 ins_pipe(ialu_reg_reg);
17736 %}
17737
17738 instruct addF_reg(regF dst, regF src) %{
17739 predicate(UseAVX == 0);
17740 match(Set dst (AddF dst src));
17741
17742 format %{ "addss $dst, $src" %}
17743 ins_cost(150);
17744 ins_encode %{
17745 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17746 %}
17747 ins_pipe(pipe_slow);
17748 %}
17749
17750 instruct addF_mem(regF dst, memory src) %{
17751 predicate(UseAVX == 0);
17752 match(Set dst (AddF dst (LoadF src)));
17753
17754 format %{ "addss $dst, $src" %}
17755 ins_cost(150);
17756 ins_encode %{
17757 __ addss($dst$$XMMRegister, $src$$Address);
17758 %}
17759 ins_pipe(pipe_slow);
17760 %}
17761
17762 instruct addF_imm(regF dst, immF con) %{
17763 predicate(UseAVX == 0);
17764 match(Set dst (AddF dst con));
17765 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17766 ins_cost(150);
17767 ins_encode %{
17768 __ addss($dst$$XMMRegister, $constantaddress($con));
17769 %}
17770 ins_pipe(pipe_slow);
17771 %}
17772
17773 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17774 predicate(UseAVX > 0);
17775 match(Set dst (AddF src1 src2));
17776
17777 format %{ "vaddss $dst, $src1, $src2" %}
17778 ins_cost(150);
17779 ins_encode %{
17780 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17781 %}
17782 ins_pipe(pipe_slow);
17783 %}
17784
17785 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17786 predicate(UseAVX > 0);
17787 match(Set dst (AddF src1 (LoadF src2)));
17788
17789 format %{ "vaddss $dst, $src1, $src2" %}
17790 ins_cost(150);
17791 ins_encode %{
17792 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17793 %}
17794 ins_pipe(pipe_slow);
17795 %}
17796
17797 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17798 predicate(UseAVX > 0);
17799 match(Set dst (AddF src con));
17800
17801 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17802 ins_cost(150);
17803 ins_encode %{
17804 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17805 %}
17806 ins_pipe(pipe_slow);
17807 %}
17808
17809 instruct addD_reg(regD dst, regD src) %{
17810 predicate(UseAVX == 0);
17811 match(Set dst (AddD dst src));
17812
17813 format %{ "addsd $dst, $src" %}
17814 ins_cost(150);
17815 ins_encode %{
17816 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17817 %}
17818 ins_pipe(pipe_slow);
17819 %}
17820
17821 instruct addD_mem(regD dst, memory src) %{
17822 predicate(UseAVX == 0);
17823 match(Set dst (AddD dst (LoadD src)));
17824
17825 format %{ "addsd $dst, $src" %}
17826 ins_cost(150);
17827 ins_encode %{
17828 __ addsd($dst$$XMMRegister, $src$$Address);
17829 %}
17830 ins_pipe(pipe_slow);
17831 %}
17832
17833 instruct addD_imm(regD dst, immD con) %{
17834 predicate(UseAVX == 0);
17835 match(Set dst (AddD dst con));
17836 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17837 ins_cost(150);
17838 ins_encode %{
17839 __ addsd($dst$$XMMRegister, $constantaddress($con));
17840 %}
17841 ins_pipe(pipe_slow);
17842 %}
17843
17844 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17845 predicate(UseAVX > 0);
17846 match(Set dst (AddD src1 src2));
17847
17848 format %{ "vaddsd $dst, $src1, $src2" %}
17849 ins_cost(150);
17850 ins_encode %{
17851 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17852 %}
17853 ins_pipe(pipe_slow);
17854 %}
17855
17856 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17857 predicate(UseAVX > 0);
17858 match(Set dst (AddD src1 (LoadD src2)));
17859
17860 format %{ "vaddsd $dst, $src1, $src2" %}
17861 ins_cost(150);
17862 ins_encode %{
17863 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17864 %}
17865 ins_pipe(pipe_slow);
17866 %}
17867
17868 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17869 predicate(UseAVX > 0);
17870 match(Set dst (AddD src con));
17871
17872 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17873 ins_cost(150);
17874 ins_encode %{
17875 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17876 %}
17877 ins_pipe(pipe_slow);
17878 %}
17879
17880 instruct subF_reg(regF dst, regF src) %{
17881 predicate(UseAVX == 0);
17882 match(Set dst (SubF dst src));
17883
17884 format %{ "subss $dst, $src" %}
17885 ins_cost(150);
17886 ins_encode %{
17887 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17888 %}
17889 ins_pipe(pipe_slow);
17890 %}
17891
17892 instruct subF_mem(regF dst, memory src) %{
17893 predicate(UseAVX == 0);
17894 match(Set dst (SubF dst (LoadF src)));
17895
17896 format %{ "subss $dst, $src" %}
17897 ins_cost(150);
17898 ins_encode %{
17899 __ subss($dst$$XMMRegister, $src$$Address);
17900 %}
17901 ins_pipe(pipe_slow);
17902 %}
17903
17904 instruct subF_imm(regF dst, immF con) %{
17905 predicate(UseAVX == 0);
17906 match(Set dst (SubF dst con));
17907 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17908 ins_cost(150);
17909 ins_encode %{
17910 __ subss($dst$$XMMRegister, $constantaddress($con));
17911 %}
17912 ins_pipe(pipe_slow);
17913 %}
17914
17915 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17916 predicate(UseAVX > 0);
17917 match(Set dst (SubF src1 src2));
17918
17919 format %{ "vsubss $dst, $src1, $src2" %}
17920 ins_cost(150);
17921 ins_encode %{
17922 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17923 %}
17924 ins_pipe(pipe_slow);
17925 %}
17926
17927 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17928 predicate(UseAVX > 0);
17929 match(Set dst (SubF src1 (LoadF src2)));
17930
17931 format %{ "vsubss $dst, $src1, $src2" %}
17932 ins_cost(150);
17933 ins_encode %{
17934 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17935 %}
17936 ins_pipe(pipe_slow);
17937 %}
17938
17939 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17940 predicate(UseAVX > 0);
17941 match(Set dst (SubF src con));
17942
17943 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17944 ins_cost(150);
17945 ins_encode %{
17946 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17947 %}
17948 ins_pipe(pipe_slow);
17949 %}
17950
17951 instruct subD_reg(regD dst, regD src) %{
17952 predicate(UseAVX == 0);
17953 match(Set dst (SubD dst src));
17954
17955 format %{ "subsd $dst, $src" %}
17956 ins_cost(150);
17957 ins_encode %{
17958 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17959 %}
17960 ins_pipe(pipe_slow);
17961 %}
17962
17963 instruct subD_mem(regD dst, memory src) %{
17964 predicate(UseAVX == 0);
17965 match(Set dst (SubD dst (LoadD src)));
17966
17967 format %{ "subsd $dst, $src" %}
17968 ins_cost(150);
17969 ins_encode %{
17970 __ subsd($dst$$XMMRegister, $src$$Address);
17971 %}
17972 ins_pipe(pipe_slow);
17973 %}
17974
17975 instruct subD_imm(regD dst, immD con) %{
17976 predicate(UseAVX == 0);
17977 match(Set dst (SubD dst con));
17978 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17979 ins_cost(150);
17980 ins_encode %{
17981 __ subsd($dst$$XMMRegister, $constantaddress($con));
17982 %}
17983 ins_pipe(pipe_slow);
17984 %}
17985
17986 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17987 predicate(UseAVX > 0);
17988 match(Set dst (SubD src1 src2));
17989
17990 format %{ "vsubsd $dst, $src1, $src2" %}
17991 ins_cost(150);
17992 ins_encode %{
17993 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17994 %}
17995 ins_pipe(pipe_slow);
17996 %}
17997
17998 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17999 predicate(UseAVX > 0);
18000 match(Set dst (SubD src1 (LoadD src2)));
18001
18002 format %{ "vsubsd $dst, $src1, $src2" %}
18003 ins_cost(150);
18004 ins_encode %{
18005 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18006 %}
18007 ins_pipe(pipe_slow);
18008 %}
18009
18010 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18011 predicate(UseAVX > 0);
18012 match(Set dst (SubD src con));
18013
18014 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18015 ins_cost(150);
18016 ins_encode %{
18017 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18018 %}
18019 ins_pipe(pipe_slow);
18020 %}
18021
18022 instruct mulF_reg(regF dst, regF src) %{
18023 predicate(UseAVX == 0);
18024 match(Set dst (MulF dst src));
18025
18026 format %{ "mulss $dst, $src" %}
18027 ins_cost(150);
18028 ins_encode %{
18029 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18030 %}
18031 ins_pipe(pipe_slow);
18032 %}
18033
18034 instruct mulF_mem(regF dst, memory src) %{
18035 predicate(UseAVX == 0);
18036 match(Set dst (MulF dst (LoadF src)));
18037
18038 format %{ "mulss $dst, $src" %}
18039 ins_cost(150);
18040 ins_encode %{
18041 __ mulss($dst$$XMMRegister, $src$$Address);
18042 %}
18043 ins_pipe(pipe_slow);
18044 %}
18045
18046 instruct mulF_imm(regF dst, immF con) %{
18047 predicate(UseAVX == 0);
18048 match(Set dst (MulF dst con));
18049 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18050 ins_cost(150);
18051 ins_encode %{
18052 __ mulss($dst$$XMMRegister, $constantaddress($con));
18053 %}
18054 ins_pipe(pipe_slow);
18055 %}
18056
18057 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18058 predicate(UseAVX > 0);
18059 match(Set dst (MulF src1 src2));
18060
18061 format %{ "vmulss $dst, $src1, $src2" %}
18062 ins_cost(150);
18063 ins_encode %{
18064 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18065 %}
18066 ins_pipe(pipe_slow);
18067 %}
18068
18069 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18070 predicate(UseAVX > 0);
18071 match(Set dst (MulF src1 (LoadF src2)));
18072
18073 format %{ "vmulss $dst, $src1, $src2" %}
18074 ins_cost(150);
18075 ins_encode %{
18076 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18077 %}
18078 ins_pipe(pipe_slow);
18079 %}
18080
18081 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18082 predicate(UseAVX > 0);
18083 match(Set dst (MulF src con));
18084
18085 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18086 ins_cost(150);
18087 ins_encode %{
18088 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18089 %}
18090 ins_pipe(pipe_slow);
18091 %}
18092
18093 instruct mulD_reg(regD dst, regD src) %{
18094 predicate(UseAVX == 0);
18095 match(Set dst (MulD dst src));
18096
18097 format %{ "mulsd $dst, $src" %}
18098 ins_cost(150);
18099 ins_encode %{
18100 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18101 %}
18102 ins_pipe(pipe_slow);
18103 %}
18104
18105 instruct mulD_mem(regD dst, memory src) %{
18106 predicate(UseAVX == 0);
18107 match(Set dst (MulD dst (LoadD src)));
18108
18109 format %{ "mulsd $dst, $src" %}
18110 ins_cost(150);
18111 ins_encode %{
18112 __ mulsd($dst$$XMMRegister, $src$$Address);
18113 %}
18114 ins_pipe(pipe_slow);
18115 %}
18116
18117 instruct mulD_imm(regD dst, immD con) %{
18118 predicate(UseAVX == 0);
18119 match(Set dst (MulD dst con));
18120 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18121 ins_cost(150);
18122 ins_encode %{
18123 __ mulsd($dst$$XMMRegister, $constantaddress($con));
18124 %}
18125 ins_pipe(pipe_slow);
18126 %}
18127
18128 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18129 predicate(UseAVX > 0);
18130 match(Set dst (MulD src1 src2));
18131
18132 format %{ "vmulsd $dst, $src1, $src2" %}
18133 ins_cost(150);
18134 ins_encode %{
18135 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18136 %}
18137 ins_pipe(pipe_slow);
18138 %}
18139
18140 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18141 predicate(UseAVX > 0);
18142 match(Set dst (MulD src1 (LoadD src2)));
18143
18144 format %{ "vmulsd $dst, $src1, $src2" %}
18145 ins_cost(150);
18146 ins_encode %{
18147 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18148 %}
18149 ins_pipe(pipe_slow);
18150 %}
18151
18152 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18153 predicate(UseAVX > 0);
18154 match(Set dst (MulD src con));
18155
18156 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18157 ins_cost(150);
18158 ins_encode %{
18159 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18160 %}
18161 ins_pipe(pipe_slow);
18162 %}
18163
18164 instruct divF_reg(regF dst, regF src) %{
18165 predicate(UseAVX == 0);
18166 match(Set dst (DivF dst src));
18167
18168 format %{ "divss $dst, $src" %}
18169 ins_cost(150);
18170 ins_encode %{
18171 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18172 %}
18173 ins_pipe(pipe_slow);
18174 %}
18175
18176 instruct divF_mem(regF dst, memory src) %{
18177 predicate(UseAVX == 0);
18178 match(Set dst (DivF dst (LoadF src)));
18179
18180 format %{ "divss $dst, $src" %}
18181 ins_cost(150);
18182 ins_encode %{
18183 __ divss($dst$$XMMRegister, $src$$Address);
18184 %}
18185 ins_pipe(pipe_slow);
18186 %}
18187
18188 instruct divF_imm(regF dst, immF con) %{
18189 predicate(UseAVX == 0);
18190 match(Set dst (DivF dst con));
18191 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18192 ins_cost(150);
18193 ins_encode %{
18194 __ divss($dst$$XMMRegister, $constantaddress($con));
18195 %}
18196 ins_pipe(pipe_slow);
18197 %}
18198
18199 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18200 predicate(UseAVX > 0);
18201 match(Set dst (DivF src1 src2));
18202
18203 format %{ "vdivss $dst, $src1, $src2" %}
18204 ins_cost(150);
18205 ins_encode %{
18206 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18207 %}
18208 ins_pipe(pipe_slow);
18209 %}
18210
18211 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18212 predicate(UseAVX > 0);
18213 match(Set dst (DivF src1 (LoadF src2)));
18214
18215 format %{ "vdivss $dst, $src1, $src2" %}
18216 ins_cost(150);
18217 ins_encode %{
18218 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18219 %}
18220 ins_pipe(pipe_slow);
18221 %}
18222
18223 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18224 predicate(UseAVX > 0);
18225 match(Set dst (DivF src con));
18226
18227 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18228 ins_cost(150);
18229 ins_encode %{
18230 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18231 %}
18232 ins_pipe(pipe_slow);
18233 %}
18234
18235 instruct divD_reg(regD dst, regD src) %{
18236 predicate(UseAVX == 0);
18237 match(Set dst (DivD dst src));
18238
18239 format %{ "divsd $dst, $src" %}
18240 ins_cost(150);
18241 ins_encode %{
18242 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18243 %}
18244 ins_pipe(pipe_slow);
18245 %}
18246
18247 instruct divD_mem(regD dst, memory src) %{
18248 predicate(UseAVX == 0);
18249 match(Set dst (DivD dst (LoadD src)));
18250
18251 format %{ "divsd $dst, $src" %}
18252 ins_cost(150);
18253 ins_encode %{
18254 __ divsd($dst$$XMMRegister, $src$$Address);
18255 %}
18256 ins_pipe(pipe_slow);
18257 %}
18258
18259 instruct divD_imm(regD dst, immD con) %{
18260 predicate(UseAVX == 0);
18261 match(Set dst (DivD dst con));
18262 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18263 ins_cost(150);
18264 ins_encode %{
18265 __ divsd($dst$$XMMRegister, $constantaddress($con));
18266 %}
18267 ins_pipe(pipe_slow);
18268 %}
18269
18270 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18271 predicate(UseAVX > 0);
18272 match(Set dst (DivD src1 src2));
18273
18274 format %{ "vdivsd $dst, $src1, $src2" %}
18275 ins_cost(150);
18276 ins_encode %{
18277 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18278 %}
18279 ins_pipe(pipe_slow);
18280 %}
18281
18282 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18283 predicate(UseAVX > 0);
18284 match(Set dst (DivD src1 (LoadD src2)));
18285
18286 format %{ "vdivsd $dst, $src1, $src2" %}
18287 ins_cost(150);
18288 ins_encode %{
18289 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18290 %}
18291 ins_pipe(pipe_slow);
18292 %}
18293
18294 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18295 predicate(UseAVX > 0);
18296 match(Set dst (DivD src con));
18297
18298 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18299 ins_cost(150);
18300 ins_encode %{
18301 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18302 %}
18303 ins_pipe(pipe_slow);
18304 %}
18305
18306 instruct absF_reg(regF dst) %{
18307 predicate(UseAVX == 0);
18308 match(Set dst (AbsF dst));
18309 ins_cost(150);
18310 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18311 ins_encode %{
18312 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18313 %}
18314 ins_pipe(pipe_slow);
18315 %}
18316
18317 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18318 predicate(UseAVX > 0);
18319 match(Set dst (AbsF src));
18320 ins_cost(150);
18321 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18322 ins_encode %{
18323 int vlen_enc = Assembler::AVX_128bit;
18324 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18325 ExternalAddress(float_signmask()), vlen_enc);
18326 %}
18327 ins_pipe(pipe_slow);
18328 %}
18329
18330 instruct absD_reg(regD dst) %{
18331 predicate(UseAVX == 0);
18332 match(Set dst (AbsD dst));
18333 ins_cost(150);
18334 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18335 "# abs double by sign masking" %}
18336 ins_encode %{
18337 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18338 %}
18339 ins_pipe(pipe_slow);
18340 %}
18341
18342 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18343 predicate(UseAVX > 0);
18344 match(Set dst (AbsD src));
18345 ins_cost(150);
18346 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18347 "# abs double by sign masking" %}
18348 ins_encode %{
18349 int vlen_enc = Assembler::AVX_128bit;
18350 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18351 ExternalAddress(double_signmask()), vlen_enc);
18352 %}
18353 ins_pipe(pipe_slow);
18354 %}
18355
18356 instruct negF_reg(regF dst) %{
18357 predicate(UseAVX == 0);
18358 match(Set dst (NegF dst));
18359 ins_cost(150);
18360 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18361 ins_encode %{
18362 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18363 %}
18364 ins_pipe(pipe_slow);
18365 %}
18366
18367 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18368 predicate(UseAVX > 0);
18369 match(Set dst (NegF src));
18370 ins_cost(150);
18371 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18372 ins_encode %{
18373 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18374 ExternalAddress(float_signflip()));
18375 %}
18376 ins_pipe(pipe_slow);
18377 %}
18378
18379 instruct negD_reg(regD dst) %{
18380 predicate(UseAVX == 0);
18381 match(Set dst (NegD dst));
18382 ins_cost(150);
18383 format %{ "xorpd $dst, [0x8000000000000000]\t"
18384 "# neg double by sign flipping" %}
18385 ins_encode %{
18386 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18387 %}
18388 ins_pipe(pipe_slow);
18389 %}
18390
18391 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18392 predicate(UseAVX > 0);
18393 match(Set dst (NegD src));
18394 ins_cost(150);
18395 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18396 "# neg double by sign flipping" %}
18397 ins_encode %{
18398 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18399 ExternalAddress(double_signflip()));
18400 %}
18401 ins_pipe(pipe_slow);
18402 %}
18403
18404 // sqrtss instruction needs destination register to be pre initialized for best performance
18405 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18406 instruct sqrtF_reg(regF dst) %{
18407 match(Set dst (SqrtF dst));
18408 format %{ "sqrtss $dst, $dst" %}
18409 ins_encode %{
18410 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18411 %}
18412 ins_pipe(pipe_slow);
18413 %}
18414
18415 // sqrtsd instruction needs destination register to be pre initialized for best performance
18416 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18417 instruct sqrtD_reg(regD dst) %{
18418 match(Set dst (SqrtD dst));
18419 format %{ "sqrtsd $dst, $dst" %}
18420 ins_encode %{
18421 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18422 %}
18423 ins_pipe(pipe_slow);
18424 %}
18425
18426 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18427 effect(TEMP tmp);
18428 match(Set dst (ConvF2HF src));
18429 ins_cost(125);
18430 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18431 ins_encode %{
18432 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18433 %}
18434 ins_pipe( pipe_slow );
18435 %}
18436
18437 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18438 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18439 effect(TEMP ktmp, TEMP rtmp);
18440 match(Set mem (StoreC mem (ConvF2HF src)));
18441 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18442 ins_encode %{
18443 __ movl($rtmp$$Register, 0x1);
18444 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18445 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18446 %}
18447 ins_pipe( pipe_slow );
18448 %}
18449
18450 instruct vconvF2HF(vec dst, vec src) %{
18451 match(Set dst (VectorCastF2HF src));
18452 format %{ "vector_conv_F2HF $dst $src" %}
18453 ins_encode %{
18454 int vlen_enc = vector_length_encoding(this, $src);
18455 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18456 %}
18457 ins_pipe( pipe_slow );
18458 %}
18459
18460 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18461 predicate(n->as_StoreVector()->memory_size() >= 16);
18462 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18463 format %{ "vcvtps2ph $mem,$src" %}
18464 ins_encode %{
18465 int vlen_enc = vector_length_encoding(this, $src);
18466 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18467 %}
18468 ins_pipe( pipe_slow );
18469 %}
18470
18471 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18472 match(Set dst (ConvHF2F src));
18473 format %{ "vcvtph2ps $dst,$src" %}
18474 ins_encode %{
18475 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18476 %}
18477 ins_pipe( pipe_slow );
18478 %}
18479
18480 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18481 match(Set dst (VectorCastHF2F (LoadVector mem)));
18482 format %{ "vcvtph2ps $dst,$mem" %}
18483 ins_encode %{
18484 int vlen_enc = vector_length_encoding(this);
18485 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18486 %}
18487 ins_pipe( pipe_slow );
18488 %}
18489
18490 instruct vconvHF2F(vec dst, vec src) %{
18491 match(Set dst (VectorCastHF2F src));
18492 ins_cost(125);
18493 format %{ "vector_conv_HF2F $dst,$src" %}
18494 ins_encode %{
18495 int vlen_enc = vector_length_encoding(this);
18496 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18497 %}
18498 ins_pipe( pipe_slow );
18499 %}
18500
18501 // ---------------------------------------- VectorReinterpret ------------------------------------
18502 instruct reinterpret_mask(kReg dst) %{
18503 predicate(n->bottom_type()->isa_vectmask() &&
18504 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18505 match(Set dst (VectorReinterpret dst));
18506 ins_cost(125);
18507 format %{ "vector_reinterpret $dst\t!" %}
18508 ins_encode %{
18509 // empty
18510 %}
18511 ins_pipe( pipe_slow );
18512 %}
18513
18514 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18515 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18516 n->bottom_type()->isa_vectmask() &&
18517 n->in(1)->bottom_type()->isa_vectmask() &&
18518 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18519 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18520 match(Set dst (VectorReinterpret src));
18521 effect(TEMP xtmp);
18522 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18523 ins_encode %{
18524 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18525 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18526 assert(src_sz == dst_sz , "src and dst size mismatch");
18527 int vlen_enc = vector_length_encoding(src_sz);
18528 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18529 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18530 %}
18531 ins_pipe( pipe_slow );
18532 %}
18533
18534 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18535 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18536 n->bottom_type()->isa_vectmask() &&
18537 n->in(1)->bottom_type()->isa_vectmask() &&
18538 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18539 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18540 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18541 match(Set dst (VectorReinterpret src));
18542 effect(TEMP xtmp);
18543 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18544 ins_encode %{
18545 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18546 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18547 assert(src_sz == dst_sz , "src and dst size mismatch");
18548 int vlen_enc = vector_length_encoding(src_sz);
18549 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18550 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18551 %}
18552 ins_pipe( pipe_slow );
18553 %}
18554
18555 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18556 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18557 n->bottom_type()->isa_vectmask() &&
18558 n->in(1)->bottom_type()->isa_vectmask() &&
18559 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18560 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18561 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18562 match(Set dst (VectorReinterpret src));
18563 effect(TEMP xtmp);
18564 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18565 ins_encode %{
18566 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18567 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18568 assert(src_sz == dst_sz , "src and dst size mismatch");
18569 int vlen_enc = vector_length_encoding(src_sz);
18570 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18571 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18572 %}
18573 ins_pipe( pipe_slow );
18574 %}
18575
18576 instruct reinterpret(vec dst) %{
18577 predicate(!n->bottom_type()->isa_vectmask() &&
18578 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18579 match(Set dst (VectorReinterpret dst));
18580 ins_cost(125);
18581 format %{ "vector_reinterpret $dst\t!" %}
18582 ins_encode %{
18583 // empty
18584 %}
18585 ins_pipe( pipe_slow );
18586 %}
18587
18588 instruct reinterpret_expand(vec dst, vec src) %{
18589 predicate(UseAVX == 0 &&
18590 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18591 match(Set dst (VectorReinterpret src));
18592 ins_cost(125);
18593 effect(TEMP dst);
18594 format %{ "vector_reinterpret_expand $dst,$src" %}
18595 ins_encode %{
18596 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18597 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18598
18599 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18600 if (src_vlen_in_bytes == 4) {
18601 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18602 } else {
18603 assert(src_vlen_in_bytes == 8, "");
18604 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18605 }
18606 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18607 %}
18608 ins_pipe( pipe_slow );
18609 %}
18610
18611 instruct vreinterpret_expand4(legVec dst, vec src) %{
18612 predicate(UseAVX > 0 &&
18613 !n->bottom_type()->isa_vectmask() &&
18614 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18615 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18616 match(Set dst (VectorReinterpret src));
18617 ins_cost(125);
18618 format %{ "vector_reinterpret_expand $dst,$src" %}
18619 ins_encode %{
18620 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18621 %}
18622 ins_pipe( pipe_slow );
18623 %}
18624
18625
18626 instruct vreinterpret_expand(legVec dst, vec src) %{
18627 predicate(UseAVX > 0 &&
18628 !n->bottom_type()->isa_vectmask() &&
18629 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18630 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18631 match(Set dst (VectorReinterpret src));
18632 ins_cost(125);
18633 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18634 ins_encode %{
18635 switch (Matcher::vector_length_in_bytes(this, $src)) {
18636 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18637 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18638 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18639 default: ShouldNotReachHere();
18640 }
18641 %}
18642 ins_pipe( pipe_slow );
18643 %}
18644
18645 instruct reinterpret_shrink(vec dst, legVec src) %{
18646 predicate(!n->bottom_type()->isa_vectmask() &&
18647 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18648 match(Set dst (VectorReinterpret src));
18649 ins_cost(125);
18650 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18651 ins_encode %{
18652 switch (Matcher::vector_length_in_bytes(this)) {
18653 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18654 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18655 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18656 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18657 default: ShouldNotReachHere();
18658 }
18659 %}
18660 ins_pipe( pipe_slow );
18661 %}
18662
18663 // ----------------------------------------------------------------------------------------------------
18664
18665 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18666 match(Set dst (RoundDoubleMode src rmode));
18667 format %{ "roundsd $dst,$src" %}
18668 ins_cost(150);
18669 ins_encode %{
18670 assert(UseSSE >= 4, "required");
18671 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18672 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18673 }
18674 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18675 %}
18676 ins_pipe(pipe_slow);
18677 %}
18678
18679 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18680 match(Set dst (RoundDoubleMode con rmode));
18681 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18682 ins_cost(150);
18683 ins_encode %{
18684 assert(UseSSE >= 4, "required");
18685 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18686 %}
18687 ins_pipe(pipe_slow);
18688 %}
18689
18690 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18691 predicate(Matcher::vector_length(n) < 8);
18692 match(Set dst (RoundDoubleModeV src rmode));
18693 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18694 ins_encode %{
18695 assert(UseAVX > 0, "required");
18696 int vlen_enc = vector_length_encoding(this);
18697 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18698 %}
18699 ins_pipe( pipe_slow );
18700 %}
18701
18702 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18703 predicate(Matcher::vector_length(n) == 8);
18704 match(Set dst (RoundDoubleModeV src rmode));
18705 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18706 ins_encode %{
18707 assert(UseAVX > 2, "required");
18708 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18709 %}
18710 ins_pipe( pipe_slow );
18711 %}
18712
18713 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18714 predicate(Matcher::vector_length(n) < 8);
18715 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18716 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18717 ins_encode %{
18718 assert(UseAVX > 0, "required");
18719 int vlen_enc = vector_length_encoding(this);
18720 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18721 %}
18722 ins_pipe( pipe_slow );
18723 %}
18724
18725 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18726 predicate(Matcher::vector_length(n) == 8);
18727 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18728 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18729 ins_encode %{
18730 assert(UseAVX > 2, "required");
18731 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18732 %}
18733 ins_pipe( pipe_slow );
18734 %}
18735
18736 instruct onspinwait() %{
18737 match(OnSpinWait);
18738 ins_cost(200);
18739
18740 format %{
18741 $$template
18742 $$emit$$"pause\t! membar_onspinwait"
18743 %}
18744 ins_encode %{
18745 __ pause();
18746 %}
18747 ins_pipe(pipe_slow);
18748 %}
18749
18750 // a * b + c
18751 instruct fmaD_reg(regD a, regD b, regD c) %{
18752 match(Set c (FmaD c (Binary a b)));
18753 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18754 ins_cost(150);
18755 ins_encode %{
18756 assert(UseFMA, "Needs FMA instructions support.");
18757 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18758 %}
18759 ins_pipe( pipe_slow );
18760 %}
18761
18762 // a * b + c
18763 instruct fmaF_reg(regF a, regF b, regF c) %{
18764 match(Set c (FmaF c (Binary a b)));
18765 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18766 ins_cost(150);
18767 ins_encode %{
18768 assert(UseFMA, "Needs FMA instructions support.");
18769 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18770 %}
18771 ins_pipe( pipe_slow );
18772 %}
18773
18774 // ====================VECTOR INSTRUCTIONS=====================================
18775
18776 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18777 instruct MoveVec2Leg(legVec dst, vec src) %{
18778 match(Set dst src);
18779 format %{ "" %}
18780 ins_encode %{
18781 ShouldNotReachHere();
18782 %}
18783 ins_pipe( fpu_reg_reg );
18784 %}
18785
18786 instruct MoveLeg2Vec(vec dst, legVec src) %{
18787 match(Set dst src);
18788 format %{ "" %}
18789 ins_encode %{
18790 ShouldNotReachHere();
18791 %}
18792 ins_pipe( fpu_reg_reg );
18793 %}
18794
18795 // ============================================================================
18796
18797 // Load vectors generic operand pattern
18798 instruct loadV(vec dst, memory mem) %{
18799 match(Set dst (LoadVector mem));
18800 ins_cost(125);
18801 format %{ "load_vector $dst,$mem" %}
18802 ins_encode %{
18803 BasicType bt = Matcher::vector_element_basic_type(this);
18804 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18805 %}
18806 ins_pipe( pipe_slow );
18807 %}
18808
18809 // Store vectors generic operand pattern.
18810 instruct storeV(memory mem, vec src) %{
18811 match(Set mem (StoreVector mem src));
18812 ins_cost(145);
18813 format %{ "store_vector $mem,$src\n\t" %}
18814 ins_encode %{
18815 switch (Matcher::vector_length_in_bytes(this, $src)) {
18816 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18817 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18818 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18819 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18820 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18821 default: ShouldNotReachHere();
18822 }
18823 %}
18824 ins_pipe( pipe_slow );
18825 %}
18826
18827 // ---------------------------------------- Gather ------------------------------------
18828
18829 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18830
18831 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18832 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18833 Matcher::vector_length_in_bytes(n) <= 32);
18834 match(Set dst (LoadVectorGather mem idx));
18835 effect(TEMP dst, TEMP tmp, TEMP mask);
18836 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18837 ins_encode %{
18838 int vlen_enc = vector_length_encoding(this);
18839 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18840 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18841 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18842 __ lea($tmp$$Register, $mem$$Address);
18843 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18844 %}
18845 ins_pipe( pipe_slow );
18846 %}
18847
18848
18849 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18850 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18851 !is_subword_type(Matcher::vector_element_basic_type(n)));
18852 match(Set dst (LoadVectorGather mem idx));
18853 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18854 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18855 ins_encode %{
18856 int vlen_enc = vector_length_encoding(this);
18857 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18858 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18859 __ lea($tmp$$Register, $mem$$Address);
18860 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18861 %}
18862 ins_pipe( pipe_slow );
18863 %}
18864
18865 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18866 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18867 !is_subword_type(Matcher::vector_element_basic_type(n)));
18868 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18869 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18870 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18871 ins_encode %{
18872 assert(UseAVX > 2, "sanity");
18873 int vlen_enc = vector_length_encoding(this);
18874 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18875 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18876 // Note: Since gather instruction partially updates the opmask register used
18877 // for predication hense moving mask operand to a temporary.
18878 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18879 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18880 __ lea($tmp$$Register, $mem$$Address);
18881 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18882 %}
18883 ins_pipe( pipe_slow );
18884 %}
18885
18886 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18887 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18888 match(Set dst (LoadVectorGather mem idx_base));
18889 effect(TEMP tmp, TEMP rtmp);
18890 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18891 ins_encode %{
18892 int vlen_enc = vector_length_encoding(this);
18893 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18894 __ lea($tmp$$Register, $mem$$Address);
18895 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18896 %}
18897 ins_pipe( pipe_slow );
18898 %}
18899
18900 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18901 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18902 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18903 match(Set dst (LoadVectorGather mem idx_base));
18904 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18905 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18906 ins_encode %{
18907 int vlen_enc = vector_length_encoding(this);
18908 int vector_len = Matcher::vector_length(this);
18909 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18910 __ lea($tmp$$Register, $mem$$Address);
18911 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18912 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18913 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18914 %}
18915 ins_pipe( pipe_slow );
18916 %}
18917
18918 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18919 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18920 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18921 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18922 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18923 ins_encode %{
18924 int vlen_enc = vector_length_encoding(this);
18925 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18926 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18927 __ lea($tmp$$Register, $mem$$Address);
18928 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18929 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18930 %}
18931 ins_pipe( pipe_slow );
18932 %}
18933
18934 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18935 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18936 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18937 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18938 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18939 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18940 ins_encode %{
18941 int vlen_enc = vector_length_encoding(this);
18942 int vector_len = Matcher::vector_length(this);
18943 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18944 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18945 __ lea($tmp$$Register, $mem$$Address);
18946 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18947 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18948 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18949 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18950 %}
18951 ins_pipe( pipe_slow );
18952 %}
18953
18954 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18955 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18956 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18957 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18958 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18959 ins_encode %{
18960 int vlen_enc = vector_length_encoding(this);
18961 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18962 __ lea($tmp$$Register, $mem$$Address);
18963 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18964 if (elem_bt == T_SHORT) {
18965 __ movl($mask_idx$$Register, 0x55555555);
18966 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18967 }
18968 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18969 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18970 %}
18971 ins_pipe( pipe_slow );
18972 %}
18973
18974 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18975 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18976 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18977 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18978 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18979 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18980 ins_encode %{
18981 int vlen_enc = vector_length_encoding(this);
18982 int vector_len = Matcher::vector_length(this);
18983 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18984 __ lea($tmp$$Register, $mem$$Address);
18985 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18986 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18987 if (elem_bt == T_SHORT) {
18988 __ movl($mask_idx$$Register, 0x55555555);
18989 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18990 }
18991 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18992 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18993 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18994 %}
18995 ins_pipe( pipe_slow );
18996 %}
18997
18998 // ====================Scatter=======================================
18999
19000 // Scatter INT, LONG, FLOAT, DOUBLE
19001
19002 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19003 predicate(UseAVX > 2);
19004 match(Set mem (StoreVectorScatter mem (Binary src idx)));
19005 effect(TEMP tmp, TEMP ktmp);
19006 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19007 ins_encode %{
19008 int vlen_enc = vector_length_encoding(this, $src);
19009 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19010
19011 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19012 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19013
19014 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19015 __ lea($tmp$$Register, $mem$$Address);
19016 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19017 %}
19018 ins_pipe( pipe_slow );
19019 %}
19020
19021 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19022 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19023 effect(TEMP tmp, TEMP ktmp);
19024 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19025 ins_encode %{
19026 int vlen_enc = vector_length_encoding(this, $src);
19027 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19028 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19029 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19030 // Note: Since scatter instruction partially updates the opmask register used
19031 // for predication hense moving mask operand to a temporary.
19032 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19033 __ lea($tmp$$Register, $mem$$Address);
19034 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19035 %}
19036 ins_pipe( pipe_slow );
19037 %}
19038
19039 // ====================REPLICATE=======================================
19040
19041 // Replicate byte scalar to be vector
19042 instruct vReplB_reg(vec dst, rRegI src) %{
19043 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19044 match(Set dst (Replicate src));
19045 format %{ "replicateB $dst,$src" %}
19046 ins_encode %{
19047 uint vlen = Matcher::vector_length(this);
19048 if (UseAVX >= 2) {
19049 int vlen_enc = vector_length_encoding(this);
19050 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19051 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19052 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19053 } else {
19054 __ movdl($dst$$XMMRegister, $src$$Register);
19055 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19056 }
19057 } else {
19058 assert(UseAVX < 2, "");
19059 __ movdl($dst$$XMMRegister, $src$$Register);
19060 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19061 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19062 if (vlen >= 16) {
19063 assert(vlen == 16, "");
19064 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19065 }
19066 }
19067 %}
19068 ins_pipe( pipe_slow );
19069 %}
19070
19071 instruct ReplB_mem(vec dst, memory mem) %{
19072 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19073 match(Set dst (Replicate (LoadB mem)));
19074 format %{ "replicateB $dst,$mem" %}
19075 ins_encode %{
19076 int vlen_enc = vector_length_encoding(this);
19077 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19078 %}
19079 ins_pipe( pipe_slow );
19080 %}
19081
19082 // ====================ReplicateS=======================================
19083
19084 instruct vReplS_reg(vec dst, rRegI src) %{
19085 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19086 match(Set dst (Replicate src));
19087 format %{ "replicateS $dst,$src" %}
19088 ins_encode %{
19089 uint vlen = Matcher::vector_length(this);
19090 int vlen_enc = vector_length_encoding(this);
19091 if (UseAVX >= 2) {
19092 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19093 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19094 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19095 } else {
19096 __ movdl($dst$$XMMRegister, $src$$Register);
19097 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19098 }
19099 } else {
19100 assert(UseAVX < 2, "");
19101 __ movdl($dst$$XMMRegister, $src$$Register);
19102 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19103 if (vlen >= 8) {
19104 assert(vlen == 8, "");
19105 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19106 }
19107 }
19108 %}
19109 ins_pipe( pipe_slow );
19110 %}
19111
19112 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19113 match(Set dst (Replicate con));
19114 effect(TEMP rtmp);
19115 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19116 ins_encode %{
19117 int vlen_enc = vector_length_encoding(this);
19118 BasicType bt = Matcher::vector_element_basic_type(this);
19119 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19120 __ movl($rtmp$$Register, $con$$constant);
19121 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19122 %}
19123 ins_pipe( pipe_slow );
19124 %}
19125
19126 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19127 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19128 match(Set dst (Replicate src));
19129 effect(TEMP rtmp);
19130 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19131 ins_encode %{
19132 int vlen_enc = vector_length_encoding(this);
19133 __ vmovw($rtmp$$Register, $src$$XMMRegister);
19134 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19135 %}
19136 ins_pipe( pipe_slow );
19137 %}
19138
19139 instruct ReplS_mem(vec dst, memory mem) %{
19140 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19141 match(Set dst (Replicate (LoadS mem)));
19142 format %{ "replicateS $dst,$mem" %}
19143 ins_encode %{
19144 int vlen_enc = vector_length_encoding(this);
19145 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19146 %}
19147 ins_pipe( pipe_slow );
19148 %}
19149
19150 // ====================ReplicateI=======================================
19151
19152 instruct ReplI_reg(vec dst, rRegI src) %{
19153 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19154 match(Set dst (Replicate src));
19155 format %{ "replicateI $dst,$src" %}
19156 ins_encode %{
19157 uint vlen = Matcher::vector_length(this);
19158 int vlen_enc = vector_length_encoding(this);
19159 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19160 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19161 } else if (VM_Version::supports_avx2()) {
19162 __ movdl($dst$$XMMRegister, $src$$Register);
19163 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19164 } else {
19165 __ movdl($dst$$XMMRegister, $src$$Register);
19166 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19167 }
19168 %}
19169 ins_pipe( pipe_slow );
19170 %}
19171
19172 instruct ReplI_mem(vec dst, memory mem) %{
19173 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19174 match(Set dst (Replicate (LoadI mem)));
19175 format %{ "replicateI $dst,$mem" %}
19176 ins_encode %{
19177 int vlen_enc = vector_length_encoding(this);
19178 if (VM_Version::supports_avx2()) {
19179 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19180 } else if (VM_Version::supports_avx()) {
19181 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19182 } else {
19183 __ movdl($dst$$XMMRegister, $mem$$Address);
19184 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19185 }
19186 %}
19187 ins_pipe( pipe_slow );
19188 %}
19189
19190 instruct ReplI_imm(vec dst, immI con) %{
19191 predicate(Matcher::is_non_long_integral_vector(n));
19192 match(Set dst (Replicate con));
19193 format %{ "replicateI $dst,$con" %}
19194 ins_encode %{
19195 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19196 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19197 type2aelembytes(Matcher::vector_element_basic_type(this))));
19198 BasicType bt = Matcher::vector_element_basic_type(this);
19199 int vlen = Matcher::vector_length_in_bytes(this);
19200 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19201 %}
19202 ins_pipe( pipe_slow );
19203 %}
19204
19205 // Replicate scalar zero to be vector
19206 instruct ReplI_zero(vec dst, immI_0 zero) %{
19207 predicate(Matcher::is_non_long_integral_vector(n));
19208 match(Set dst (Replicate zero));
19209 format %{ "replicateI $dst,$zero" %}
19210 ins_encode %{
19211 int vlen_enc = vector_length_encoding(this);
19212 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19213 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19214 } else {
19215 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19216 }
19217 %}
19218 ins_pipe( fpu_reg_reg );
19219 %}
19220
19221 instruct ReplI_M1(vec dst, immI_M1 con) %{
19222 predicate(Matcher::is_non_long_integral_vector(n));
19223 match(Set dst (Replicate con));
19224 format %{ "vallones $dst" %}
19225 ins_encode %{
19226 int vector_len = vector_length_encoding(this);
19227 __ vallones($dst$$XMMRegister, vector_len);
19228 %}
19229 ins_pipe( pipe_slow );
19230 %}
19231
19232 // ====================ReplicateL=======================================
19233
19234 // Replicate long (8 byte) scalar to be vector
19235 instruct ReplL_reg(vec dst, rRegL src) %{
19236 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19237 match(Set dst (Replicate src));
19238 format %{ "replicateL $dst,$src" %}
19239 ins_encode %{
19240 int vlen = Matcher::vector_length(this);
19241 int vlen_enc = vector_length_encoding(this);
19242 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19243 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19244 } else if (VM_Version::supports_avx2()) {
19245 __ movdq($dst$$XMMRegister, $src$$Register);
19246 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19247 } else {
19248 __ movdq($dst$$XMMRegister, $src$$Register);
19249 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19250 }
19251 %}
19252 ins_pipe( pipe_slow );
19253 %}
19254
19255 instruct ReplL_mem(vec dst, memory mem) %{
19256 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19257 match(Set dst (Replicate (LoadL mem)));
19258 format %{ "replicateL $dst,$mem" %}
19259 ins_encode %{
19260 int vlen_enc = vector_length_encoding(this);
19261 if (VM_Version::supports_avx2()) {
19262 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19263 } else if (VM_Version::supports_sse3()) {
19264 __ movddup($dst$$XMMRegister, $mem$$Address);
19265 } else {
19266 __ movq($dst$$XMMRegister, $mem$$Address);
19267 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19268 }
19269 %}
19270 ins_pipe( pipe_slow );
19271 %}
19272
19273 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19274 instruct ReplL_imm(vec dst, immL con) %{
19275 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19276 match(Set dst (Replicate con));
19277 format %{ "replicateL $dst,$con" %}
19278 ins_encode %{
19279 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19280 int vlen = Matcher::vector_length_in_bytes(this);
19281 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19282 %}
19283 ins_pipe( pipe_slow );
19284 %}
19285
19286 instruct ReplL_zero(vec dst, immL0 zero) %{
19287 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19288 match(Set dst (Replicate zero));
19289 format %{ "replicateL $dst,$zero" %}
19290 ins_encode %{
19291 int vlen_enc = vector_length_encoding(this);
19292 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19293 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19294 } else {
19295 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19296 }
19297 %}
19298 ins_pipe( fpu_reg_reg );
19299 %}
19300
19301 instruct ReplL_M1(vec dst, immL_M1 con) %{
19302 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19303 match(Set dst (Replicate con));
19304 format %{ "vallones $dst" %}
19305 ins_encode %{
19306 int vector_len = vector_length_encoding(this);
19307 __ vallones($dst$$XMMRegister, vector_len);
19308 %}
19309 ins_pipe( pipe_slow );
19310 %}
19311
19312 // ====================ReplicateF=======================================
19313
19314 instruct vReplF_reg(vec dst, vlRegF src) %{
19315 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19316 match(Set dst (Replicate src));
19317 format %{ "replicateF $dst,$src" %}
19318 ins_encode %{
19319 uint vlen = Matcher::vector_length(this);
19320 int vlen_enc = vector_length_encoding(this);
19321 if (vlen <= 4) {
19322 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19323 } else if (VM_Version::supports_avx2()) {
19324 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19325 } else {
19326 assert(vlen == 8, "sanity");
19327 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19328 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19329 }
19330 %}
19331 ins_pipe( pipe_slow );
19332 %}
19333
19334 instruct ReplF_reg(vec dst, vlRegF src) %{
19335 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19336 match(Set dst (Replicate src));
19337 format %{ "replicateF $dst,$src" %}
19338 ins_encode %{
19339 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19340 %}
19341 ins_pipe( pipe_slow );
19342 %}
19343
19344 instruct ReplF_mem(vec dst, memory mem) %{
19345 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19346 match(Set dst (Replicate (LoadF mem)));
19347 format %{ "replicateF $dst,$mem" %}
19348 ins_encode %{
19349 int vlen_enc = vector_length_encoding(this);
19350 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19351 %}
19352 ins_pipe( pipe_slow );
19353 %}
19354
19355 // Replicate float scalar immediate to be vector by loading from const table.
19356 instruct ReplF_imm(vec dst, immF con) %{
19357 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19358 match(Set dst (Replicate con));
19359 format %{ "replicateF $dst,$con" %}
19360 ins_encode %{
19361 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19362 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19363 int vlen = Matcher::vector_length_in_bytes(this);
19364 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19365 %}
19366 ins_pipe( pipe_slow );
19367 %}
19368
19369 instruct ReplF_zero(vec dst, immF0 zero) %{
19370 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19371 match(Set dst (Replicate zero));
19372 format %{ "replicateF $dst,$zero" %}
19373 ins_encode %{
19374 int vlen_enc = vector_length_encoding(this);
19375 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19376 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19377 } else {
19378 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19379 }
19380 %}
19381 ins_pipe( fpu_reg_reg );
19382 %}
19383
19384 // ====================ReplicateD=======================================
19385
19386 // Replicate double (8 bytes) scalar to be vector
19387 instruct vReplD_reg(vec dst, vlRegD src) %{
19388 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19389 match(Set dst (Replicate src));
19390 format %{ "replicateD $dst,$src" %}
19391 ins_encode %{
19392 uint vlen = Matcher::vector_length(this);
19393 int vlen_enc = vector_length_encoding(this);
19394 if (vlen <= 2) {
19395 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19396 } else if (VM_Version::supports_avx2()) {
19397 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19398 } else {
19399 assert(vlen == 4, "sanity");
19400 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19401 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19402 }
19403 %}
19404 ins_pipe( pipe_slow );
19405 %}
19406
19407 instruct ReplD_reg(vec dst, vlRegD src) %{
19408 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19409 match(Set dst (Replicate src));
19410 format %{ "replicateD $dst,$src" %}
19411 ins_encode %{
19412 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19413 %}
19414 ins_pipe( pipe_slow );
19415 %}
19416
19417 instruct ReplD_mem(vec dst, memory mem) %{
19418 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19419 match(Set dst (Replicate (LoadD mem)));
19420 format %{ "replicateD $dst,$mem" %}
19421 ins_encode %{
19422 if (Matcher::vector_length(this) >= 4) {
19423 int vlen_enc = vector_length_encoding(this);
19424 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19425 } else {
19426 __ movddup($dst$$XMMRegister, $mem$$Address);
19427 }
19428 %}
19429 ins_pipe( pipe_slow );
19430 %}
19431
19432 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19433 instruct ReplD_imm(vec dst, immD con) %{
19434 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19435 match(Set dst (Replicate con));
19436 format %{ "replicateD $dst,$con" %}
19437 ins_encode %{
19438 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19439 int vlen = Matcher::vector_length_in_bytes(this);
19440 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19441 %}
19442 ins_pipe( pipe_slow );
19443 %}
19444
19445 instruct ReplD_zero(vec dst, immD0 zero) %{
19446 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19447 match(Set dst (Replicate zero));
19448 format %{ "replicateD $dst,$zero" %}
19449 ins_encode %{
19450 int vlen_enc = vector_length_encoding(this);
19451 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19452 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19453 } else {
19454 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19455 }
19456 %}
19457 ins_pipe( fpu_reg_reg );
19458 %}
19459
19460 // ====================VECTOR INSERT=======================================
19461
19462 instruct insert(vec dst, rRegI val, immU8 idx) %{
19463 predicate(Matcher::vector_length_in_bytes(n) < 32);
19464 match(Set dst (VectorInsert (Binary dst val) idx));
19465 format %{ "vector_insert $dst,$val,$idx" %}
19466 ins_encode %{
19467 assert(UseSSE >= 4, "required");
19468 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19469
19470 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19471
19472 assert(is_integral_type(elem_bt), "");
19473 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19474
19475 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19476 %}
19477 ins_pipe( pipe_slow );
19478 %}
19479
19480 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19481 predicate(Matcher::vector_length_in_bytes(n) == 32);
19482 match(Set dst (VectorInsert (Binary src val) idx));
19483 effect(TEMP vtmp);
19484 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19485 ins_encode %{
19486 int vlen_enc = Assembler::AVX_256bit;
19487 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19488 int elem_per_lane = 16/type2aelembytes(elem_bt);
19489 int log2epr = log2(elem_per_lane);
19490
19491 assert(is_integral_type(elem_bt), "sanity");
19492 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19493
19494 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19495 uint y_idx = ($idx$$constant >> log2epr) & 1;
19496 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19497 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19498 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19499 %}
19500 ins_pipe( pipe_slow );
19501 %}
19502
19503 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19504 predicate(Matcher::vector_length_in_bytes(n) == 64);
19505 match(Set dst (VectorInsert (Binary src val) idx));
19506 effect(TEMP vtmp);
19507 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19508 ins_encode %{
19509 assert(UseAVX > 2, "sanity");
19510
19511 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19512 int elem_per_lane = 16/type2aelembytes(elem_bt);
19513 int log2epr = log2(elem_per_lane);
19514
19515 assert(is_integral_type(elem_bt), "");
19516 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19517
19518 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19519 uint y_idx = ($idx$$constant >> log2epr) & 3;
19520 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19521 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19522 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19523 %}
19524 ins_pipe( pipe_slow );
19525 %}
19526
19527 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19528 predicate(Matcher::vector_length(n) == 2);
19529 match(Set dst (VectorInsert (Binary dst val) idx));
19530 format %{ "vector_insert $dst,$val,$idx" %}
19531 ins_encode %{
19532 assert(UseSSE >= 4, "required");
19533 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19534 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19535
19536 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19537 %}
19538 ins_pipe( pipe_slow );
19539 %}
19540
19541 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19542 predicate(Matcher::vector_length(n) == 4);
19543 match(Set dst (VectorInsert (Binary src val) idx));
19544 effect(TEMP vtmp);
19545 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19546 ins_encode %{
19547 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19548 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19549
19550 uint x_idx = $idx$$constant & right_n_bits(1);
19551 uint y_idx = ($idx$$constant >> 1) & 1;
19552 int vlen_enc = Assembler::AVX_256bit;
19553 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19554 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19555 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19556 %}
19557 ins_pipe( pipe_slow );
19558 %}
19559
19560 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19561 predicate(Matcher::vector_length(n) == 8);
19562 match(Set dst (VectorInsert (Binary src val) idx));
19563 effect(TEMP vtmp);
19564 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19565 ins_encode %{
19566 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19567 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19568
19569 uint x_idx = $idx$$constant & right_n_bits(1);
19570 uint y_idx = ($idx$$constant >> 1) & 3;
19571 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19572 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19573 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19574 %}
19575 ins_pipe( pipe_slow );
19576 %}
19577
19578 instruct insertF(vec dst, regF val, immU8 idx) %{
19579 predicate(Matcher::vector_length(n) < 8);
19580 match(Set dst (VectorInsert (Binary dst val) idx));
19581 format %{ "vector_insert $dst,$val,$idx" %}
19582 ins_encode %{
19583 assert(UseSSE >= 4, "sanity");
19584
19585 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19586 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19587
19588 uint x_idx = $idx$$constant & right_n_bits(2);
19589 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19590 %}
19591 ins_pipe( pipe_slow );
19592 %}
19593
19594 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19595 predicate(Matcher::vector_length(n) >= 8);
19596 match(Set dst (VectorInsert (Binary src val) idx));
19597 effect(TEMP vtmp);
19598 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19599 ins_encode %{
19600 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19601 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19602
19603 int vlen = Matcher::vector_length(this);
19604 uint x_idx = $idx$$constant & right_n_bits(2);
19605 if (vlen == 8) {
19606 uint y_idx = ($idx$$constant >> 2) & 1;
19607 int vlen_enc = Assembler::AVX_256bit;
19608 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19609 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19610 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19611 } else {
19612 assert(vlen == 16, "sanity");
19613 uint y_idx = ($idx$$constant >> 2) & 3;
19614 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19615 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19616 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19617 }
19618 %}
19619 ins_pipe( pipe_slow );
19620 %}
19621
19622 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19623 predicate(Matcher::vector_length(n) == 2);
19624 match(Set dst (VectorInsert (Binary dst val) idx));
19625 effect(TEMP tmp);
19626 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19627 ins_encode %{
19628 assert(UseSSE >= 4, "sanity");
19629 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19630 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19631
19632 __ movq($tmp$$Register, $val$$XMMRegister);
19633 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19634 %}
19635 ins_pipe( pipe_slow );
19636 %}
19637
19638 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19639 predicate(Matcher::vector_length(n) == 4);
19640 match(Set dst (VectorInsert (Binary src val) idx));
19641 effect(TEMP vtmp, TEMP tmp);
19642 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19643 ins_encode %{
19644 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19645 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19646
19647 uint x_idx = $idx$$constant & right_n_bits(1);
19648 uint y_idx = ($idx$$constant >> 1) & 1;
19649 int vlen_enc = Assembler::AVX_256bit;
19650 __ movq($tmp$$Register, $val$$XMMRegister);
19651 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19652 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19653 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19654 %}
19655 ins_pipe( pipe_slow );
19656 %}
19657
19658 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19659 predicate(Matcher::vector_length(n) == 8);
19660 match(Set dst (VectorInsert (Binary src val) idx));
19661 effect(TEMP tmp, TEMP vtmp);
19662 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19663 ins_encode %{
19664 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19665 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19666
19667 uint x_idx = $idx$$constant & right_n_bits(1);
19668 uint y_idx = ($idx$$constant >> 1) & 3;
19669 __ movq($tmp$$Register, $val$$XMMRegister);
19670 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19671 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19672 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19673 %}
19674 ins_pipe( pipe_slow );
19675 %}
19676
19677 // ====================REDUCTION ARITHMETIC=======================================
19678
19679 // =======================Int Reduction==========================================
19680
19681 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19682 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19683 match(Set dst (AddReductionVI src1 src2));
19684 match(Set dst (MulReductionVI src1 src2));
19685 match(Set dst (AndReductionV src1 src2));
19686 match(Set dst ( OrReductionV src1 src2));
19687 match(Set dst (XorReductionV src1 src2));
19688 match(Set dst (MinReductionV src1 src2));
19689 match(Set dst (MaxReductionV src1 src2));
19690 match(Set dst (UMinReductionV src1 src2));
19691 match(Set dst (UMaxReductionV src1 src2));
19692 effect(TEMP vtmp1, TEMP vtmp2);
19693 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19694 ins_encode %{
19695 int opcode = this->ideal_Opcode();
19696 int vlen = Matcher::vector_length(this, $src2);
19697 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19698 %}
19699 ins_pipe( pipe_slow );
19700 %}
19701
19702 // =======================Long Reduction==========================================
19703
19704 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19705 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19706 match(Set dst (AddReductionVL src1 src2));
19707 match(Set dst (MulReductionVL src1 src2));
19708 match(Set dst (AndReductionV src1 src2));
19709 match(Set dst ( OrReductionV src1 src2));
19710 match(Set dst (XorReductionV src1 src2));
19711 match(Set dst (MinReductionV src1 src2));
19712 match(Set dst (MaxReductionV src1 src2));
19713 match(Set dst (UMinReductionV src1 src2));
19714 match(Set dst (UMaxReductionV src1 src2));
19715 effect(TEMP vtmp1, TEMP vtmp2);
19716 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19717 ins_encode %{
19718 int opcode = this->ideal_Opcode();
19719 int vlen = Matcher::vector_length(this, $src2);
19720 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19721 %}
19722 ins_pipe( pipe_slow );
19723 %}
19724
19725 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19726 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19727 match(Set dst (AddReductionVL src1 src2));
19728 match(Set dst (MulReductionVL src1 src2));
19729 match(Set dst (AndReductionV src1 src2));
19730 match(Set dst ( OrReductionV src1 src2));
19731 match(Set dst (XorReductionV src1 src2));
19732 match(Set dst (MinReductionV src1 src2));
19733 match(Set dst (MaxReductionV src1 src2));
19734 match(Set dst (UMinReductionV src1 src2));
19735 match(Set dst (UMaxReductionV src1 src2));
19736 effect(TEMP vtmp1, TEMP vtmp2);
19737 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19738 ins_encode %{
19739 int opcode = this->ideal_Opcode();
19740 int vlen = Matcher::vector_length(this, $src2);
19741 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19742 %}
19743 ins_pipe( pipe_slow );
19744 %}
19745
19746 // =======================Float Reduction==========================================
19747
19748 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19749 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19750 match(Set dst (AddReductionVF dst src));
19751 match(Set dst (MulReductionVF dst src));
19752 effect(TEMP dst, TEMP vtmp);
19753 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19754 ins_encode %{
19755 int opcode = this->ideal_Opcode();
19756 int vlen = Matcher::vector_length(this, $src);
19757 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19758 %}
19759 ins_pipe( pipe_slow );
19760 %}
19761
19762 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19763 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19764 match(Set dst (AddReductionVF dst src));
19765 match(Set dst (MulReductionVF dst src));
19766 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19767 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19768 ins_encode %{
19769 int opcode = this->ideal_Opcode();
19770 int vlen = Matcher::vector_length(this, $src);
19771 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19772 %}
19773 ins_pipe( pipe_slow );
19774 %}
19775
19776 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19777 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19778 match(Set dst (AddReductionVF dst src));
19779 match(Set dst (MulReductionVF dst src));
19780 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19781 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19782 ins_encode %{
19783 int opcode = this->ideal_Opcode();
19784 int vlen = Matcher::vector_length(this, $src);
19785 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19786 %}
19787 ins_pipe( pipe_slow );
19788 %}
19789
19790
19791 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19792 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19793 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19794 // src1 contains reduction identity
19795 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19796 match(Set dst (AddReductionVF src1 src2));
19797 match(Set dst (MulReductionVF src1 src2));
19798 effect(TEMP dst);
19799 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19800 ins_encode %{
19801 int opcode = this->ideal_Opcode();
19802 int vlen = Matcher::vector_length(this, $src2);
19803 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19804 %}
19805 ins_pipe( pipe_slow );
19806 %}
19807
19808 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19809 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19810 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19811 // src1 contains reduction identity
19812 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19813 match(Set dst (AddReductionVF src1 src2));
19814 match(Set dst (MulReductionVF src1 src2));
19815 effect(TEMP dst, TEMP vtmp);
19816 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19817 ins_encode %{
19818 int opcode = this->ideal_Opcode();
19819 int vlen = Matcher::vector_length(this, $src2);
19820 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19821 %}
19822 ins_pipe( pipe_slow );
19823 %}
19824
19825 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19826 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19827 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19828 // src1 contains reduction identity
19829 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19830 match(Set dst (AddReductionVF src1 src2));
19831 match(Set dst (MulReductionVF src1 src2));
19832 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19833 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19834 ins_encode %{
19835 int opcode = this->ideal_Opcode();
19836 int vlen = Matcher::vector_length(this, $src2);
19837 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19838 %}
19839 ins_pipe( pipe_slow );
19840 %}
19841
19842 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19843 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19844 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19845 // src1 contains reduction identity
19846 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19847 match(Set dst (AddReductionVF src1 src2));
19848 match(Set dst (MulReductionVF src1 src2));
19849 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19850 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19851 ins_encode %{
19852 int opcode = this->ideal_Opcode();
19853 int vlen = Matcher::vector_length(this, $src2);
19854 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19855 %}
19856 ins_pipe( pipe_slow );
19857 %}
19858
19859 // =======================Double Reduction==========================================
19860
19861 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19862 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19863 match(Set dst (AddReductionVD dst src));
19864 match(Set dst (MulReductionVD dst src));
19865 effect(TEMP dst, TEMP vtmp);
19866 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19867 ins_encode %{
19868 int opcode = this->ideal_Opcode();
19869 int vlen = Matcher::vector_length(this, $src);
19870 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19871 %}
19872 ins_pipe( pipe_slow );
19873 %}
19874
19875 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19876 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19877 match(Set dst (AddReductionVD dst src));
19878 match(Set dst (MulReductionVD dst src));
19879 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19880 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19881 ins_encode %{
19882 int opcode = this->ideal_Opcode();
19883 int vlen = Matcher::vector_length(this, $src);
19884 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19885 %}
19886 ins_pipe( pipe_slow );
19887 %}
19888
19889 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19890 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19891 match(Set dst (AddReductionVD dst src));
19892 match(Set dst (MulReductionVD dst src));
19893 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19894 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19895 ins_encode %{
19896 int opcode = this->ideal_Opcode();
19897 int vlen = Matcher::vector_length(this, $src);
19898 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19899 %}
19900 ins_pipe( pipe_slow );
19901 %}
19902
19903 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19904 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19905 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19906 // src1 contains reduction identity
19907 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19908 match(Set dst (AddReductionVD src1 src2));
19909 match(Set dst (MulReductionVD src1 src2));
19910 effect(TEMP dst);
19911 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19912 ins_encode %{
19913 int opcode = this->ideal_Opcode();
19914 int vlen = Matcher::vector_length(this, $src2);
19915 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19916 %}
19917 ins_pipe( pipe_slow );
19918 %}
19919
19920 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19921 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19922 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19923 // src1 contains reduction identity
19924 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19925 match(Set dst (AddReductionVD src1 src2));
19926 match(Set dst (MulReductionVD src1 src2));
19927 effect(TEMP dst, TEMP vtmp);
19928 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19929 ins_encode %{
19930 int opcode = this->ideal_Opcode();
19931 int vlen = Matcher::vector_length(this, $src2);
19932 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19933 %}
19934 ins_pipe( pipe_slow );
19935 %}
19936
19937 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19938 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19939 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19940 // src1 contains reduction identity
19941 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19942 match(Set dst (AddReductionVD src1 src2));
19943 match(Set dst (MulReductionVD src1 src2));
19944 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19945 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19946 ins_encode %{
19947 int opcode = this->ideal_Opcode();
19948 int vlen = Matcher::vector_length(this, $src2);
19949 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19950 %}
19951 ins_pipe( pipe_slow );
19952 %}
19953
19954 // =======================Byte Reduction==========================================
19955
19956 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19957 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19958 match(Set dst (AddReductionVI src1 src2));
19959 match(Set dst (AndReductionV src1 src2));
19960 match(Set dst ( OrReductionV src1 src2));
19961 match(Set dst (XorReductionV src1 src2));
19962 match(Set dst (MinReductionV src1 src2));
19963 match(Set dst (MaxReductionV src1 src2));
19964 match(Set dst (UMinReductionV src1 src2));
19965 match(Set dst (UMaxReductionV src1 src2));
19966 effect(TEMP vtmp1, TEMP vtmp2);
19967 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19968 ins_encode %{
19969 int opcode = this->ideal_Opcode();
19970 int vlen = Matcher::vector_length(this, $src2);
19971 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19972 %}
19973 ins_pipe( pipe_slow );
19974 %}
19975
19976 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19977 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19978 match(Set dst (AddReductionVI src1 src2));
19979 match(Set dst (AndReductionV src1 src2));
19980 match(Set dst ( OrReductionV src1 src2));
19981 match(Set dst (XorReductionV src1 src2));
19982 match(Set dst (MinReductionV src1 src2));
19983 match(Set dst (MaxReductionV src1 src2));
19984 match(Set dst (UMinReductionV src1 src2));
19985 match(Set dst (UMaxReductionV src1 src2));
19986 effect(TEMP vtmp1, TEMP vtmp2);
19987 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19988 ins_encode %{
19989 int opcode = this->ideal_Opcode();
19990 int vlen = Matcher::vector_length(this, $src2);
19991 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19992 %}
19993 ins_pipe( pipe_slow );
19994 %}
19995
19996 // =======================Short Reduction==========================================
19997
19998 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19999 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20000 match(Set dst (AddReductionVI src1 src2));
20001 match(Set dst (MulReductionVI src1 src2));
20002 match(Set dst (AndReductionV src1 src2));
20003 match(Set dst ( OrReductionV src1 src2));
20004 match(Set dst (XorReductionV src1 src2));
20005 match(Set dst (MinReductionV src1 src2));
20006 match(Set dst (MaxReductionV src1 src2));
20007 match(Set dst (UMinReductionV src1 src2));
20008 match(Set dst (UMaxReductionV src1 src2));
20009 effect(TEMP vtmp1, TEMP vtmp2);
20010 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20011 ins_encode %{
20012 int opcode = this->ideal_Opcode();
20013 int vlen = Matcher::vector_length(this, $src2);
20014 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20015 %}
20016 ins_pipe( pipe_slow );
20017 %}
20018
20019 // =======================Mul Reduction==========================================
20020
20021 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20022 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20023 Matcher::vector_length(n->in(2)) <= 32); // src2
20024 match(Set dst (MulReductionVI src1 src2));
20025 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20026 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20027 ins_encode %{
20028 int opcode = this->ideal_Opcode();
20029 int vlen = Matcher::vector_length(this, $src2);
20030 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20031 %}
20032 ins_pipe( pipe_slow );
20033 %}
20034
20035 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20036 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20037 Matcher::vector_length(n->in(2)) == 64); // src2
20038 match(Set dst (MulReductionVI src1 src2));
20039 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20040 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20041 ins_encode %{
20042 int opcode = this->ideal_Opcode();
20043 int vlen = Matcher::vector_length(this, $src2);
20044 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20045 %}
20046 ins_pipe( pipe_slow );
20047 %}
20048
20049 //--------------------Min/Max Float Reduction --------------------
20050 // Float Min Reduction
20051 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20052 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20053 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20054 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20055 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20056 Matcher::vector_length(n->in(2)) == 2);
20057 match(Set dst (MinReductionV src1 src2));
20058 match(Set dst (MaxReductionV src1 src2));
20059 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20060 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20061 ins_encode %{
20062 assert(UseAVX > 0, "sanity");
20063
20064 int opcode = this->ideal_Opcode();
20065 int vlen = Matcher::vector_length(this, $src2);
20066 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20067 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20068 %}
20069 ins_pipe( pipe_slow );
20070 %}
20071
20072 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20073 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20074 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20075 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20076 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20077 Matcher::vector_length(n->in(2)) >= 4);
20078 match(Set dst (MinReductionV src1 src2));
20079 match(Set dst (MaxReductionV src1 src2));
20080 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20081 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20082 ins_encode %{
20083 assert(UseAVX > 0, "sanity");
20084
20085 int opcode = this->ideal_Opcode();
20086 int vlen = Matcher::vector_length(this, $src2);
20087 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20088 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20089 %}
20090 ins_pipe( pipe_slow );
20091 %}
20092
20093 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20094 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20095 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20096 Matcher::vector_length(n->in(2)) == 2);
20097 match(Set dst (MinReductionV dst src));
20098 match(Set dst (MaxReductionV dst src));
20099 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20100 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20101 ins_encode %{
20102 assert(UseAVX > 0, "sanity");
20103
20104 int opcode = this->ideal_Opcode();
20105 int vlen = Matcher::vector_length(this, $src);
20106 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20107 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20108 %}
20109 ins_pipe( pipe_slow );
20110 %}
20111
20112
20113 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20114 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20115 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20116 Matcher::vector_length(n->in(2)) >= 4);
20117 match(Set dst (MinReductionV dst src));
20118 match(Set dst (MaxReductionV dst src));
20119 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20120 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20121 ins_encode %{
20122 assert(UseAVX > 0, "sanity");
20123
20124 int opcode = this->ideal_Opcode();
20125 int vlen = Matcher::vector_length(this, $src);
20126 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20127 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20128 %}
20129 ins_pipe( pipe_slow );
20130 %}
20131
20132 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20133 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20134 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20135 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20136 Matcher::vector_length(n->in(2)) == 2);
20137 match(Set dst (MinReductionV src1 src2));
20138 match(Set dst (MaxReductionV src1 src2));
20139 effect(TEMP dst, TEMP xtmp1);
20140 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20141 ins_encode %{
20142 int opcode = this->ideal_Opcode();
20143 int vlen = Matcher::vector_length(this, $src2);
20144 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20145 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20146 %}
20147 ins_pipe( pipe_slow );
20148 %}
20149
20150 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20151 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20152 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20153 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20154 Matcher::vector_length(n->in(2)) >= 4);
20155 match(Set dst (MinReductionV src1 src2));
20156 match(Set dst (MaxReductionV src1 src2));
20157 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20158 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20159 ins_encode %{
20160 int opcode = this->ideal_Opcode();
20161 int vlen = Matcher::vector_length(this, $src2);
20162 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20163 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20164 %}
20165 ins_pipe( pipe_slow );
20166 %}
20167
20168 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20169 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20170 Matcher::vector_length(n->in(2)) == 2);
20171 match(Set dst (MinReductionV dst src));
20172 match(Set dst (MaxReductionV dst src));
20173 effect(TEMP dst, TEMP xtmp1);
20174 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20175 ins_encode %{
20176 int opcode = this->ideal_Opcode();
20177 int vlen = Matcher::vector_length(this, $src);
20178 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20179 $xtmp1$$XMMRegister);
20180 %}
20181 ins_pipe( pipe_slow );
20182 %}
20183
20184 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20185 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20186 Matcher::vector_length(n->in(2)) >= 4);
20187 match(Set dst (MinReductionV dst src));
20188 match(Set dst (MaxReductionV dst src));
20189 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20190 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20191 ins_encode %{
20192 int opcode = this->ideal_Opcode();
20193 int vlen = Matcher::vector_length(this, $src);
20194 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20195 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20196 %}
20197 ins_pipe( pipe_slow );
20198 %}
20199
20200 //--------------------Min Double Reduction --------------------
20201 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20202 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20203 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20204 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20205 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20206 Matcher::vector_length(n->in(2)) == 2);
20207 match(Set dst (MinReductionV src1 src2));
20208 match(Set dst (MaxReductionV src1 src2));
20209 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20210 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20211 ins_encode %{
20212 assert(UseAVX > 0, "sanity");
20213
20214 int opcode = this->ideal_Opcode();
20215 int vlen = Matcher::vector_length(this, $src2);
20216 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20217 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20218 %}
20219 ins_pipe( pipe_slow );
20220 %}
20221
20222 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20223 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20224 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20225 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20226 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20227 Matcher::vector_length(n->in(2)) >= 4);
20228 match(Set dst (MinReductionV src1 src2));
20229 match(Set dst (MaxReductionV src1 src2));
20230 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20231 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20232 ins_encode %{
20233 assert(UseAVX > 0, "sanity");
20234
20235 int opcode = this->ideal_Opcode();
20236 int vlen = Matcher::vector_length(this, $src2);
20237 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20238 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20239 %}
20240 ins_pipe( pipe_slow );
20241 %}
20242
20243
20244 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20245 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20246 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20247 Matcher::vector_length(n->in(2)) == 2);
20248 match(Set dst (MinReductionV dst src));
20249 match(Set dst (MaxReductionV dst src));
20250 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20251 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20252 ins_encode %{
20253 assert(UseAVX > 0, "sanity");
20254
20255 int opcode = this->ideal_Opcode();
20256 int vlen = Matcher::vector_length(this, $src);
20257 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20258 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20259 %}
20260 ins_pipe( pipe_slow );
20261 %}
20262
20263 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20264 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20265 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20266 Matcher::vector_length(n->in(2)) >= 4);
20267 match(Set dst (MinReductionV dst src));
20268 match(Set dst (MaxReductionV dst src));
20269 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20270 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20271 ins_encode %{
20272 assert(UseAVX > 0, "sanity");
20273
20274 int opcode = this->ideal_Opcode();
20275 int vlen = Matcher::vector_length(this, $src);
20276 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20277 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20278 %}
20279 ins_pipe( pipe_slow );
20280 %}
20281
20282 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20283 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20284 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20285 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20286 Matcher::vector_length(n->in(2)) == 2);
20287 match(Set dst (MinReductionV src1 src2));
20288 match(Set dst (MaxReductionV src1 src2));
20289 effect(TEMP dst, TEMP xtmp1);
20290 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20291 ins_encode %{
20292 int opcode = this->ideal_Opcode();
20293 int vlen = Matcher::vector_length(this, $src2);
20294 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20295 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20296 %}
20297 ins_pipe( pipe_slow );
20298 %}
20299
20300 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20301 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20302 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20303 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20304 Matcher::vector_length(n->in(2)) >= 4);
20305 match(Set dst (MinReductionV src1 src2));
20306 match(Set dst (MaxReductionV src1 src2));
20307 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20308 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20309 ins_encode %{
20310 int opcode = this->ideal_Opcode();
20311 int vlen = Matcher::vector_length(this, $src2);
20312 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20313 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20314 %}
20315 ins_pipe( pipe_slow );
20316 %}
20317
20318
20319 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20320 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20321 Matcher::vector_length(n->in(2)) == 2);
20322 match(Set dst (MinReductionV dst src));
20323 match(Set dst (MaxReductionV dst src));
20324 effect(TEMP dst, TEMP xtmp1);
20325 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20326 ins_encode %{
20327 int opcode = this->ideal_Opcode();
20328 int vlen = Matcher::vector_length(this, $src);
20329 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20330 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20331 %}
20332 ins_pipe( pipe_slow );
20333 %}
20334
20335 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20336 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20337 Matcher::vector_length(n->in(2)) >= 4);
20338 match(Set dst (MinReductionV dst src));
20339 match(Set dst (MaxReductionV dst src));
20340 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20341 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20342 ins_encode %{
20343 int opcode = this->ideal_Opcode();
20344 int vlen = Matcher::vector_length(this, $src);
20345 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20346 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20347 %}
20348 ins_pipe( pipe_slow );
20349 %}
20350
20351 // ====================VECTOR ARITHMETIC=======================================
20352
20353 // --------------------------------- ADD --------------------------------------
20354
20355 // Bytes vector add
20356 instruct vaddB(vec dst, vec src) %{
20357 predicate(UseAVX == 0);
20358 match(Set dst (AddVB dst src));
20359 format %{ "paddb $dst,$src\t! add packedB" %}
20360 ins_encode %{
20361 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20362 %}
20363 ins_pipe( pipe_slow );
20364 %}
20365
20366 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20367 predicate(UseAVX > 0);
20368 match(Set dst (AddVB src1 src2));
20369 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20370 ins_encode %{
20371 int vlen_enc = vector_length_encoding(this);
20372 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20373 %}
20374 ins_pipe( pipe_slow );
20375 %}
20376
20377 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20378 predicate((UseAVX > 0) &&
20379 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20380 match(Set dst (AddVB src (LoadVector mem)));
20381 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20382 ins_encode %{
20383 int vlen_enc = vector_length_encoding(this);
20384 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20385 %}
20386 ins_pipe( pipe_slow );
20387 %}
20388
20389 // Shorts/Chars vector add
20390 instruct vaddS(vec dst, vec src) %{
20391 predicate(UseAVX == 0);
20392 match(Set dst (AddVS dst src));
20393 format %{ "paddw $dst,$src\t! add packedS" %}
20394 ins_encode %{
20395 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20396 %}
20397 ins_pipe( pipe_slow );
20398 %}
20399
20400 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20401 predicate(UseAVX > 0);
20402 match(Set dst (AddVS src1 src2));
20403 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20404 ins_encode %{
20405 int vlen_enc = vector_length_encoding(this);
20406 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20407 %}
20408 ins_pipe( pipe_slow );
20409 %}
20410
20411 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20412 predicate((UseAVX > 0) &&
20413 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20414 match(Set dst (AddVS src (LoadVector mem)));
20415 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20416 ins_encode %{
20417 int vlen_enc = vector_length_encoding(this);
20418 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20419 %}
20420 ins_pipe( pipe_slow );
20421 %}
20422
20423 // Integers vector add
20424 instruct vaddI(vec dst, vec src) %{
20425 predicate(UseAVX == 0);
20426 match(Set dst (AddVI dst src));
20427 format %{ "paddd $dst,$src\t! add packedI" %}
20428 ins_encode %{
20429 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20430 %}
20431 ins_pipe( pipe_slow );
20432 %}
20433
20434 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20435 predicate(UseAVX > 0);
20436 match(Set dst (AddVI src1 src2));
20437 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20438 ins_encode %{
20439 int vlen_enc = vector_length_encoding(this);
20440 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20441 %}
20442 ins_pipe( pipe_slow );
20443 %}
20444
20445
20446 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20447 predicate((UseAVX > 0) &&
20448 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20449 match(Set dst (AddVI src (LoadVector mem)));
20450 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20451 ins_encode %{
20452 int vlen_enc = vector_length_encoding(this);
20453 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20454 %}
20455 ins_pipe( pipe_slow );
20456 %}
20457
20458 // Longs vector add
20459 instruct vaddL(vec dst, vec src) %{
20460 predicate(UseAVX == 0);
20461 match(Set dst (AddVL dst src));
20462 format %{ "paddq $dst,$src\t! add packedL" %}
20463 ins_encode %{
20464 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20465 %}
20466 ins_pipe( pipe_slow );
20467 %}
20468
20469 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20470 predicate(UseAVX > 0);
20471 match(Set dst (AddVL src1 src2));
20472 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20473 ins_encode %{
20474 int vlen_enc = vector_length_encoding(this);
20475 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20476 %}
20477 ins_pipe( pipe_slow );
20478 %}
20479
20480 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20481 predicate((UseAVX > 0) &&
20482 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20483 match(Set dst (AddVL src (LoadVector mem)));
20484 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20485 ins_encode %{
20486 int vlen_enc = vector_length_encoding(this);
20487 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20488 %}
20489 ins_pipe( pipe_slow );
20490 %}
20491
20492 // Floats vector add
20493 instruct vaddF(vec dst, vec src) %{
20494 predicate(UseAVX == 0);
20495 match(Set dst (AddVF dst src));
20496 format %{ "addps $dst,$src\t! add packedF" %}
20497 ins_encode %{
20498 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20499 %}
20500 ins_pipe( pipe_slow );
20501 %}
20502
20503 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20504 predicate(UseAVX > 0);
20505 match(Set dst (AddVF src1 src2));
20506 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20507 ins_encode %{
20508 int vlen_enc = vector_length_encoding(this);
20509 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20510 %}
20511 ins_pipe( pipe_slow );
20512 %}
20513
20514 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20515 predicate((UseAVX > 0) &&
20516 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20517 match(Set dst (AddVF src (LoadVector mem)));
20518 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20519 ins_encode %{
20520 int vlen_enc = vector_length_encoding(this);
20521 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20522 %}
20523 ins_pipe( pipe_slow );
20524 %}
20525
20526 // Doubles vector add
20527 instruct vaddD(vec dst, vec src) %{
20528 predicate(UseAVX == 0);
20529 match(Set dst (AddVD dst src));
20530 format %{ "addpd $dst,$src\t! add packedD" %}
20531 ins_encode %{
20532 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20533 %}
20534 ins_pipe( pipe_slow );
20535 %}
20536
20537 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20538 predicate(UseAVX > 0);
20539 match(Set dst (AddVD src1 src2));
20540 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20541 ins_encode %{
20542 int vlen_enc = vector_length_encoding(this);
20543 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20544 %}
20545 ins_pipe( pipe_slow );
20546 %}
20547
20548 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20549 predicate((UseAVX > 0) &&
20550 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20551 match(Set dst (AddVD src (LoadVector mem)));
20552 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20553 ins_encode %{
20554 int vlen_enc = vector_length_encoding(this);
20555 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20556 %}
20557 ins_pipe( pipe_slow );
20558 %}
20559
20560 // --------------------------------- SUB --------------------------------------
20561
20562 // Bytes vector sub
20563 instruct vsubB(vec dst, vec src) %{
20564 predicate(UseAVX == 0);
20565 match(Set dst (SubVB dst src));
20566 format %{ "psubb $dst,$src\t! sub packedB" %}
20567 ins_encode %{
20568 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20569 %}
20570 ins_pipe( pipe_slow );
20571 %}
20572
20573 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20574 predicate(UseAVX > 0);
20575 match(Set dst (SubVB src1 src2));
20576 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20577 ins_encode %{
20578 int vlen_enc = vector_length_encoding(this);
20579 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20580 %}
20581 ins_pipe( pipe_slow );
20582 %}
20583
20584 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20585 predicate((UseAVX > 0) &&
20586 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20587 match(Set dst (SubVB src (LoadVector mem)));
20588 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20589 ins_encode %{
20590 int vlen_enc = vector_length_encoding(this);
20591 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20592 %}
20593 ins_pipe( pipe_slow );
20594 %}
20595
20596 // Shorts/Chars vector sub
20597 instruct vsubS(vec dst, vec src) %{
20598 predicate(UseAVX == 0);
20599 match(Set dst (SubVS dst src));
20600 format %{ "psubw $dst,$src\t! sub packedS" %}
20601 ins_encode %{
20602 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20603 %}
20604 ins_pipe( pipe_slow );
20605 %}
20606
20607
20608 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20609 predicate(UseAVX > 0);
20610 match(Set dst (SubVS src1 src2));
20611 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20612 ins_encode %{
20613 int vlen_enc = vector_length_encoding(this);
20614 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20615 %}
20616 ins_pipe( pipe_slow );
20617 %}
20618
20619 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20620 predicate((UseAVX > 0) &&
20621 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20622 match(Set dst (SubVS src (LoadVector mem)));
20623 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20624 ins_encode %{
20625 int vlen_enc = vector_length_encoding(this);
20626 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20627 %}
20628 ins_pipe( pipe_slow );
20629 %}
20630
20631 // Integers vector sub
20632 instruct vsubI(vec dst, vec src) %{
20633 predicate(UseAVX == 0);
20634 match(Set dst (SubVI dst src));
20635 format %{ "psubd $dst,$src\t! sub packedI" %}
20636 ins_encode %{
20637 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20638 %}
20639 ins_pipe( pipe_slow );
20640 %}
20641
20642 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20643 predicate(UseAVX > 0);
20644 match(Set dst (SubVI src1 src2));
20645 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20646 ins_encode %{
20647 int vlen_enc = vector_length_encoding(this);
20648 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20649 %}
20650 ins_pipe( pipe_slow );
20651 %}
20652
20653 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20654 predicate((UseAVX > 0) &&
20655 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20656 match(Set dst (SubVI src (LoadVector mem)));
20657 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20658 ins_encode %{
20659 int vlen_enc = vector_length_encoding(this);
20660 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20661 %}
20662 ins_pipe( pipe_slow );
20663 %}
20664
20665 // Longs vector sub
20666 instruct vsubL(vec dst, vec src) %{
20667 predicate(UseAVX == 0);
20668 match(Set dst (SubVL dst src));
20669 format %{ "psubq $dst,$src\t! sub packedL" %}
20670 ins_encode %{
20671 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20672 %}
20673 ins_pipe( pipe_slow );
20674 %}
20675
20676 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20677 predicate(UseAVX > 0);
20678 match(Set dst (SubVL src1 src2));
20679 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20680 ins_encode %{
20681 int vlen_enc = vector_length_encoding(this);
20682 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20683 %}
20684 ins_pipe( pipe_slow );
20685 %}
20686
20687
20688 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20689 predicate((UseAVX > 0) &&
20690 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20691 match(Set dst (SubVL src (LoadVector mem)));
20692 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20693 ins_encode %{
20694 int vlen_enc = vector_length_encoding(this);
20695 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20696 %}
20697 ins_pipe( pipe_slow );
20698 %}
20699
20700 // Floats vector sub
20701 instruct vsubF(vec dst, vec src) %{
20702 predicate(UseAVX == 0);
20703 match(Set dst (SubVF dst src));
20704 format %{ "subps $dst,$src\t! sub packedF" %}
20705 ins_encode %{
20706 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20707 %}
20708 ins_pipe( pipe_slow );
20709 %}
20710
20711 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20712 predicate(UseAVX > 0);
20713 match(Set dst (SubVF src1 src2));
20714 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20715 ins_encode %{
20716 int vlen_enc = vector_length_encoding(this);
20717 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20718 %}
20719 ins_pipe( pipe_slow );
20720 %}
20721
20722 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20723 predicate((UseAVX > 0) &&
20724 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20725 match(Set dst (SubVF src (LoadVector mem)));
20726 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20727 ins_encode %{
20728 int vlen_enc = vector_length_encoding(this);
20729 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20730 %}
20731 ins_pipe( pipe_slow );
20732 %}
20733
20734 // Doubles vector sub
20735 instruct vsubD(vec dst, vec src) %{
20736 predicate(UseAVX == 0);
20737 match(Set dst (SubVD dst src));
20738 format %{ "subpd $dst,$src\t! sub packedD" %}
20739 ins_encode %{
20740 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20741 %}
20742 ins_pipe( pipe_slow );
20743 %}
20744
20745 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20746 predicate(UseAVX > 0);
20747 match(Set dst (SubVD src1 src2));
20748 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20749 ins_encode %{
20750 int vlen_enc = vector_length_encoding(this);
20751 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20752 %}
20753 ins_pipe( pipe_slow );
20754 %}
20755
20756 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20757 predicate((UseAVX > 0) &&
20758 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20759 match(Set dst (SubVD src (LoadVector mem)));
20760 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20761 ins_encode %{
20762 int vlen_enc = vector_length_encoding(this);
20763 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20764 %}
20765 ins_pipe( pipe_slow );
20766 %}
20767
20768 // --------------------------------- MUL --------------------------------------
20769
20770 // Byte vector mul
20771 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20772 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20773 match(Set dst (MulVB src1 src2));
20774 effect(TEMP dst, TEMP xtmp);
20775 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20776 ins_encode %{
20777 assert(UseSSE > 3, "required");
20778 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20779 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20780 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20781 __ psllw($dst$$XMMRegister, 8);
20782 __ psrlw($dst$$XMMRegister, 8);
20783 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20784 %}
20785 ins_pipe( pipe_slow );
20786 %}
20787
20788 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20789 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20790 match(Set dst (MulVB src1 src2));
20791 effect(TEMP dst, TEMP xtmp);
20792 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20793 ins_encode %{
20794 assert(UseSSE > 3, "required");
20795 // Odd-index elements
20796 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20797 __ psrlw($dst$$XMMRegister, 8);
20798 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20799 __ psrlw($xtmp$$XMMRegister, 8);
20800 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20801 __ psllw($dst$$XMMRegister, 8);
20802 // Even-index elements
20803 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20804 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20805 __ psllw($xtmp$$XMMRegister, 8);
20806 __ psrlw($xtmp$$XMMRegister, 8);
20807 // Combine
20808 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20809 %}
20810 ins_pipe( pipe_slow );
20811 %}
20812
20813 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20814 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20815 match(Set dst (MulVB src1 src2));
20816 effect(TEMP xtmp1, TEMP xtmp2);
20817 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20818 ins_encode %{
20819 int vlen_enc = vector_length_encoding(this);
20820 // Odd-index elements
20821 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20822 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20823 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20824 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20825 // Even-index elements
20826 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20827 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20828 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20829 // Combine
20830 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20831 %}
20832 ins_pipe( pipe_slow );
20833 %}
20834
20835 // Shorts/Chars vector mul
20836 instruct vmulS(vec dst, vec src) %{
20837 predicate(UseAVX == 0);
20838 match(Set dst (MulVS dst src));
20839 format %{ "pmullw $dst,$src\t! mul packedS" %}
20840 ins_encode %{
20841 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20842 %}
20843 ins_pipe( pipe_slow );
20844 %}
20845
20846 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20847 predicate(UseAVX > 0);
20848 match(Set dst (MulVS src1 src2));
20849 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20850 ins_encode %{
20851 int vlen_enc = vector_length_encoding(this);
20852 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20853 %}
20854 ins_pipe( pipe_slow );
20855 %}
20856
20857 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20858 predicate((UseAVX > 0) &&
20859 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20860 match(Set dst (MulVS src (LoadVector mem)));
20861 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20862 ins_encode %{
20863 int vlen_enc = vector_length_encoding(this);
20864 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20865 %}
20866 ins_pipe( pipe_slow );
20867 %}
20868
20869 // Integers vector mul
20870 instruct vmulI(vec dst, vec src) %{
20871 predicate(UseAVX == 0);
20872 match(Set dst (MulVI dst src));
20873 format %{ "pmulld $dst,$src\t! mul packedI" %}
20874 ins_encode %{
20875 assert(UseSSE > 3, "required");
20876 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20877 %}
20878 ins_pipe( pipe_slow );
20879 %}
20880
20881 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20882 predicate(UseAVX > 0);
20883 match(Set dst (MulVI src1 src2));
20884 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20885 ins_encode %{
20886 int vlen_enc = vector_length_encoding(this);
20887 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20888 %}
20889 ins_pipe( pipe_slow );
20890 %}
20891
20892 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20893 predicate((UseAVX > 0) &&
20894 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20895 match(Set dst (MulVI src (LoadVector mem)));
20896 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20897 ins_encode %{
20898 int vlen_enc = vector_length_encoding(this);
20899 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20900 %}
20901 ins_pipe( pipe_slow );
20902 %}
20903
20904 // Longs vector mul
20905 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20906 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20907 VM_Version::supports_avx512dq()) ||
20908 VM_Version::supports_avx512vldq());
20909 match(Set dst (MulVL src1 src2));
20910 ins_cost(500);
20911 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20912 ins_encode %{
20913 assert(UseAVX > 2, "required");
20914 int vlen_enc = vector_length_encoding(this);
20915 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20916 %}
20917 ins_pipe( pipe_slow );
20918 %}
20919
20920 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20921 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20922 VM_Version::supports_avx512dq()) ||
20923 (Matcher::vector_length_in_bytes(n) > 8 &&
20924 VM_Version::supports_avx512vldq()));
20925 match(Set dst (MulVL src (LoadVector mem)));
20926 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20927 ins_cost(500);
20928 ins_encode %{
20929 assert(UseAVX > 2, "required");
20930 int vlen_enc = vector_length_encoding(this);
20931 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20932 %}
20933 ins_pipe( pipe_slow );
20934 %}
20935
20936 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20937 predicate(UseAVX == 0);
20938 match(Set dst (MulVL src1 src2));
20939 ins_cost(500);
20940 effect(TEMP dst, TEMP xtmp);
20941 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20942 ins_encode %{
20943 assert(VM_Version::supports_sse4_1(), "required");
20944 // Get the lo-hi products, only the lower 32 bits is in concerns
20945 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20946 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20947 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20948 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20949 __ psllq($dst$$XMMRegister, 32);
20950 // Get the lo-lo products
20951 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20952 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20953 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20954 %}
20955 ins_pipe( pipe_slow );
20956 %}
20957
20958 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20959 predicate(UseAVX > 0 &&
20960 ((Matcher::vector_length_in_bytes(n) == 64 &&
20961 !VM_Version::supports_avx512dq()) ||
20962 (Matcher::vector_length_in_bytes(n) < 64 &&
20963 !VM_Version::supports_avx512vldq())));
20964 match(Set dst (MulVL src1 src2));
20965 effect(TEMP xtmp1, TEMP xtmp2);
20966 ins_cost(500);
20967 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20968 ins_encode %{
20969 int vlen_enc = vector_length_encoding(this);
20970 // Get the lo-hi products, only the lower 32 bits is in concerns
20971 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20972 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20973 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20974 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20975 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20976 // Get the lo-lo products
20977 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20978 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20979 %}
20980 ins_pipe( pipe_slow );
20981 %}
20982
20983 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20984 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20985 match(Set dst (MulVL src1 src2));
20986 ins_cost(100);
20987 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20988 ins_encode %{
20989 int vlen_enc = vector_length_encoding(this);
20990 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20991 %}
20992 ins_pipe( pipe_slow );
20993 %}
20994
20995 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20996 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20997 match(Set dst (MulVL src1 src2));
20998 ins_cost(100);
20999 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21000 ins_encode %{
21001 int vlen_enc = vector_length_encoding(this);
21002 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21003 %}
21004 ins_pipe( pipe_slow );
21005 %}
21006
21007 // Floats vector mul
21008 instruct vmulF(vec dst, vec src) %{
21009 predicate(UseAVX == 0);
21010 match(Set dst (MulVF dst src));
21011 format %{ "mulps $dst,$src\t! mul packedF" %}
21012 ins_encode %{
21013 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21014 %}
21015 ins_pipe( pipe_slow );
21016 %}
21017
21018 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21019 predicate(UseAVX > 0);
21020 match(Set dst (MulVF src1 src2));
21021 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
21022 ins_encode %{
21023 int vlen_enc = vector_length_encoding(this);
21024 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21025 %}
21026 ins_pipe( pipe_slow );
21027 %}
21028
21029 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21030 predicate((UseAVX > 0) &&
21031 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21032 match(Set dst (MulVF src (LoadVector mem)));
21033 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
21034 ins_encode %{
21035 int vlen_enc = vector_length_encoding(this);
21036 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21037 %}
21038 ins_pipe( pipe_slow );
21039 %}
21040
21041 // Doubles vector mul
21042 instruct vmulD(vec dst, vec src) %{
21043 predicate(UseAVX == 0);
21044 match(Set dst (MulVD dst src));
21045 format %{ "mulpd $dst,$src\t! mul packedD" %}
21046 ins_encode %{
21047 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21048 %}
21049 ins_pipe( pipe_slow );
21050 %}
21051
21052 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21053 predicate(UseAVX > 0);
21054 match(Set dst (MulVD src1 src2));
21055 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
21056 ins_encode %{
21057 int vlen_enc = vector_length_encoding(this);
21058 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21059 %}
21060 ins_pipe( pipe_slow );
21061 %}
21062
21063 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21064 predicate((UseAVX > 0) &&
21065 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21066 match(Set dst (MulVD src (LoadVector mem)));
21067 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
21068 ins_encode %{
21069 int vlen_enc = vector_length_encoding(this);
21070 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21071 %}
21072 ins_pipe( pipe_slow );
21073 %}
21074
21075 // --------------------------------- DIV --------------------------------------
21076
21077 // Floats vector div
21078 instruct vdivF(vec dst, vec src) %{
21079 predicate(UseAVX == 0);
21080 match(Set dst (DivVF dst src));
21081 format %{ "divps $dst,$src\t! div packedF" %}
21082 ins_encode %{
21083 __ divps($dst$$XMMRegister, $src$$XMMRegister);
21084 %}
21085 ins_pipe( pipe_slow );
21086 %}
21087
21088 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21089 predicate(UseAVX > 0);
21090 match(Set dst (DivVF src1 src2));
21091 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
21092 ins_encode %{
21093 int vlen_enc = vector_length_encoding(this);
21094 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21095 %}
21096 ins_pipe( pipe_slow );
21097 %}
21098
21099 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21100 predicate((UseAVX > 0) &&
21101 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21102 match(Set dst (DivVF src (LoadVector mem)));
21103 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
21104 ins_encode %{
21105 int vlen_enc = vector_length_encoding(this);
21106 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21107 %}
21108 ins_pipe( pipe_slow );
21109 %}
21110
21111 // Doubles vector div
21112 instruct vdivD(vec dst, vec src) %{
21113 predicate(UseAVX == 0);
21114 match(Set dst (DivVD dst src));
21115 format %{ "divpd $dst,$src\t! div packedD" %}
21116 ins_encode %{
21117 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21118 %}
21119 ins_pipe( pipe_slow );
21120 %}
21121
21122 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21123 predicate(UseAVX > 0);
21124 match(Set dst (DivVD src1 src2));
21125 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
21126 ins_encode %{
21127 int vlen_enc = vector_length_encoding(this);
21128 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21129 %}
21130 ins_pipe( pipe_slow );
21131 %}
21132
21133 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21134 predicate((UseAVX > 0) &&
21135 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21136 match(Set dst (DivVD src (LoadVector mem)));
21137 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
21138 ins_encode %{
21139 int vlen_enc = vector_length_encoding(this);
21140 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21141 %}
21142 ins_pipe( pipe_slow );
21143 %}
21144
21145 // ------------------------------ MinMax ---------------------------------------
21146
21147 // Byte, Short, Int vector Min/Max
21148 instruct minmax_reg_sse(vec dst, vec src) %{
21149 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21150 UseAVX == 0);
21151 match(Set dst (MinV dst src));
21152 match(Set dst (MaxV dst src));
21153 format %{ "vector_minmax $dst,$src\t! " %}
21154 ins_encode %{
21155 assert(UseSSE >= 4, "required");
21156
21157 int opcode = this->ideal_Opcode();
21158 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21159 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21160 %}
21161 ins_pipe( pipe_slow );
21162 %}
21163
21164 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21165 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21166 UseAVX > 0);
21167 match(Set dst (MinV src1 src2));
21168 match(Set dst (MaxV src1 src2));
21169 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21170 ins_encode %{
21171 int opcode = this->ideal_Opcode();
21172 int vlen_enc = vector_length_encoding(this);
21173 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21174
21175 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21176 %}
21177 ins_pipe( pipe_slow );
21178 %}
21179
21180 // Long vector Min/Max
21181 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21182 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21183 UseAVX == 0);
21184 match(Set dst (MinV dst src));
21185 match(Set dst (MaxV src dst));
21186 effect(TEMP dst, TEMP tmp);
21187 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21188 ins_encode %{
21189 assert(UseSSE >= 4, "required");
21190
21191 int opcode = this->ideal_Opcode();
21192 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21193 assert(elem_bt == T_LONG, "sanity");
21194
21195 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21196 %}
21197 ins_pipe( pipe_slow );
21198 %}
21199
21200 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21201 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21202 UseAVX > 0 && !VM_Version::supports_avx512vl());
21203 match(Set dst (MinV src1 src2));
21204 match(Set dst (MaxV src1 src2));
21205 effect(TEMP dst);
21206 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21207 ins_encode %{
21208 int vlen_enc = vector_length_encoding(this);
21209 int opcode = this->ideal_Opcode();
21210 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21211 assert(elem_bt == T_LONG, "sanity");
21212
21213 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21214 %}
21215 ins_pipe( pipe_slow );
21216 %}
21217
21218 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21219 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21220 Matcher::vector_element_basic_type(n) == T_LONG);
21221 match(Set dst (MinV src1 src2));
21222 match(Set dst (MaxV src1 src2));
21223 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21224 ins_encode %{
21225 assert(UseAVX > 2, "required");
21226
21227 int vlen_enc = vector_length_encoding(this);
21228 int opcode = this->ideal_Opcode();
21229 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21230 assert(elem_bt == T_LONG, "sanity");
21231
21232 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21233 %}
21234 ins_pipe( pipe_slow );
21235 %}
21236
21237 // Float/Double vector Min/Max
21238 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21239 predicate(VM_Version::supports_avx10_2() &&
21240 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21241 match(Set dst (MinV a b));
21242 match(Set dst (MaxV a b));
21243 format %{ "vector_minmaxFP $dst, $a, $b" %}
21244 ins_encode %{
21245 int vlen_enc = vector_length_encoding(this);
21246 int opcode = this->ideal_Opcode();
21247 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21248 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21249 %}
21250 ins_pipe( pipe_slow );
21251 %}
21252
21253 // Float/Double vector Min/Max
21254 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21255 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21256 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21257 UseAVX > 0);
21258 match(Set dst (MinV a b));
21259 match(Set dst (MaxV a b));
21260 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21261 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21262 ins_encode %{
21263 assert(UseAVX > 0, "required");
21264
21265 int opcode = this->ideal_Opcode();
21266 int vlen_enc = vector_length_encoding(this);
21267 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21268
21269 __ vminmax_fp(opcode, elem_bt,
21270 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21271 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21272 %}
21273 ins_pipe( pipe_slow );
21274 %}
21275
21276 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21277 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21278 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21279 match(Set dst (MinV a b));
21280 match(Set dst (MaxV a b));
21281 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21282 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21283 ins_encode %{
21284 assert(UseAVX > 2, "required");
21285
21286 int opcode = this->ideal_Opcode();
21287 int vlen_enc = vector_length_encoding(this);
21288 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21289
21290 __ evminmax_fp(opcode, elem_bt,
21291 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21292 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21293 %}
21294 ins_pipe( pipe_slow );
21295 %}
21296
21297 // ------------------------------ Unsigned vector Min/Max ----------------------
21298
21299 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21300 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21301 match(Set dst (UMinV a b));
21302 match(Set dst (UMaxV a b));
21303 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21304 ins_encode %{
21305 int opcode = this->ideal_Opcode();
21306 int vlen_enc = vector_length_encoding(this);
21307 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21308 assert(is_integral_type(elem_bt), "");
21309 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21310 %}
21311 ins_pipe( pipe_slow );
21312 %}
21313
21314 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21315 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21316 match(Set dst (UMinV a (LoadVector b)));
21317 match(Set dst (UMaxV a (LoadVector b)));
21318 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21319 ins_encode %{
21320 int opcode = this->ideal_Opcode();
21321 int vlen_enc = vector_length_encoding(this);
21322 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21323 assert(is_integral_type(elem_bt), "");
21324 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21325 %}
21326 ins_pipe( pipe_slow );
21327 %}
21328
21329 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21330 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21331 match(Set dst (UMinV a b));
21332 match(Set dst (UMaxV a b));
21333 effect(TEMP xtmp1, TEMP xtmp2);
21334 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21335 ins_encode %{
21336 int opcode = this->ideal_Opcode();
21337 int vlen_enc = vector_length_encoding(this);
21338 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21339 %}
21340 ins_pipe( pipe_slow );
21341 %}
21342
21343 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21344 match(Set dst (UMinV (Binary dst src2) mask));
21345 match(Set dst (UMaxV (Binary dst src2) mask));
21346 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21347 ins_encode %{
21348 int vlen_enc = vector_length_encoding(this);
21349 BasicType bt = Matcher::vector_element_basic_type(this);
21350 int opc = this->ideal_Opcode();
21351 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21352 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21353 %}
21354 ins_pipe( pipe_slow );
21355 %}
21356
21357 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21358 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21359 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21360 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21361 ins_encode %{
21362 int vlen_enc = vector_length_encoding(this);
21363 BasicType bt = Matcher::vector_element_basic_type(this);
21364 int opc = this->ideal_Opcode();
21365 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21366 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21367 %}
21368 ins_pipe( pipe_slow );
21369 %}
21370
21371 // --------------------------------- Signum/CopySign ---------------------------
21372
21373 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21374 match(Set dst (SignumF dst (Binary zero one)));
21375 effect(KILL cr);
21376 format %{ "signumF $dst, $dst" %}
21377 ins_encode %{
21378 int opcode = this->ideal_Opcode();
21379 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21380 %}
21381 ins_pipe( pipe_slow );
21382 %}
21383
21384 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21385 match(Set dst (SignumD dst (Binary zero one)));
21386 effect(KILL cr);
21387 format %{ "signumD $dst, $dst" %}
21388 ins_encode %{
21389 int opcode = this->ideal_Opcode();
21390 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21391 %}
21392 ins_pipe( pipe_slow );
21393 %}
21394
21395 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21396 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21397 match(Set dst (SignumVF src (Binary zero one)));
21398 match(Set dst (SignumVD src (Binary zero one)));
21399 effect(TEMP dst, TEMP xtmp1);
21400 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21401 ins_encode %{
21402 int opcode = this->ideal_Opcode();
21403 int vec_enc = vector_length_encoding(this);
21404 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21405 $xtmp1$$XMMRegister, vec_enc);
21406 %}
21407 ins_pipe( pipe_slow );
21408 %}
21409
21410 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21411 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21412 match(Set dst (SignumVF src (Binary zero one)));
21413 match(Set dst (SignumVD src (Binary zero one)));
21414 effect(TEMP dst, TEMP ktmp1);
21415 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21416 ins_encode %{
21417 int opcode = this->ideal_Opcode();
21418 int vec_enc = vector_length_encoding(this);
21419 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21420 $ktmp1$$KRegister, vec_enc);
21421 %}
21422 ins_pipe( pipe_slow );
21423 %}
21424
21425 // ---------------------------------------
21426 // For copySign use 0xE4 as writemask for vpternlog
21427 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21428 // C (xmm2) is set to 0x7FFFFFFF
21429 // Wherever xmm2 is 0, we want to pick from B (sign)
21430 // Wherever xmm2 is 1, we want to pick from A (src)
21431 //
21432 // A B C Result
21433 // 0 0 0 0
21434 // 0 0 1 0
21435 // 0 1 0 1
21436 // 0 1 1 0
21437 // 1 0 0 0
21438 // 1 0 1 1
21439 // 1 1 0 1
21440 // 1 1 1 1
21441 //
21442 // Result going from high bit to low bit is 0x11100100 = 0xe4
21443 // ---------------------------------------
21444
21445 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21446 match(Set dst (CopySignF dst src));
21447 effect(TEMP tmp1, TEMP tmp2);
21448 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21449 ins_encode %{
21450 __ movl($tmp2$$Register, 0x7FFFFFFF);
21451 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21452 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21453 %}
21454 ins_pipe( pipe_slow );
21455 %}
21456
21457 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21458 match(Set dst (CopySignD dst (Binary src zero)));
21459 ins_cost(100);
21460 effect(TEMP tmp1, TEMP tmp2);
21461 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21462 ins_encode %{
21463 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21464 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21465 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21466 %}
21467 ins_pipe( pipe_slow );
21468 %}
21469
21470 //----------------------------- CompressBits/ExpandBits ------------------------
21471
21472 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21473 predicate(n->bottom_type()->isa_int());
21474 match(Set dst (CompressBits src mask));
21475 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21476 ins_encode %{
21477 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21478 %}
21479 ins_pipe( pipe_slow );
21480 %}
21481
21482 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21483 predicate(n->bottom_type()->isa_int());
21484 match(Set dst (ExpandBits src mask));
21485 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21486 ins_encode %{
21487 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21488 %}
21489 ins_pipe( pipe_slow );
21490 %}
21491
21492 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21493 predicate(n->bottom_type()->isa_int());
21494 match(Set dst (CompressBits src (LoadI mask)));
21495 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21496 ins_encode %{
21497 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21498 %}
21499 ins_pipe( pipe_slow );
21500 %}
21501
21502 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21503 predicate(n->bottom_type()->isa_int());
21504 match(Set dst (ExpandBits src (LoadI mask)));
21505 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21506 ins_encode %{
21507 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21508 %}
21509 ins_pipe( pipe_slow );
21510 %}
21511
21512 // --------------------------------- Sqrt --------------------------------------
21513
21514 instruct vsqrtF_reg(vec dst, vec src) %{
21515 match(Set dst (SqrtVF src));
21516 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21517 ins_encode %{
21518 assert(UseAVX > 0, "required");
21519 int vlen_enc = vector_length_encoding(this);
21520 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21521 %}
21522 ins_pipe( pipe_slow );
21523 %}
21524
21525 instruct vsqrtF_mem(vec dst, memory mem) %{
21526 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21527 match(Set dst (SqrtVF (LoadVector mem)));
21528 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21529 ins_encode %{
21530 assert(UseAVX > 0, "required");
21531 int vlen_enc = vector_length_encoding(this);
21532 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21533 %}
21534 ins_pipe( pipe_slow );
21535 %}
21536
21537 // Floating point vector sqrt
21538 instruct vsqrtD_reg(vec dst, vec src) %{
21539 match(Set dst (SqrtVD src));
21540 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21541 ins_encode %{
21542 assert(UseAVX > 0, "required");
21543 int vlen_enc = vector_length_encoding(this);
21544 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21545 %}
21546 ins_pipe( pipe_slow );
21547 %}
21548
21549 instruct vsqrtD_mem(vec dst, memory mem) %{
21550 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21551 match(Set dst (SqrtVD (LoadVector mem)));
21552 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21553 ins_encode %{
21554 assert(UseAVX > 0, "required");
21555 int vlen_enc = vector_length_encoding(this);
21556 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21557 %}
21558 ins_pipe( pipe_slow );
21559 %}
21560
21561 // ------------------------------ Shift ---------------------------------------
21562
21563 // Left and right shift count vectors are the same on x86
21564 // (only lowest bits of xmm reg are used for count).
21565 instruct vshiftcnt(vec dst, rRegI cnt) %{
21566 match(Set dst (LShiftCntV cnt));
21567 match(Set dst (RShiftCntV cnt));
21568 format %{ "movdl $dst,$cnt\t! load shift count" %}
21569 ins_encode %{
21570 __ movdl($dst$$XMMRegister, $cnt$$Register);
21571 %}
21572 ins_pipe( pipe_slow );
21573 %}
21574
21575 // Byte vector shift
21576 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21577 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21578 match(Set dst ( LShiftVB src shift));
21579 match(Set dst ( RShiftVB src shift));
21580 match(Set dst (URShiftVB src shift));
21581 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21582 format %{"vector_byte_shift $dst,$src,$shift" %}
21583 ins_encode %{
21584 assert(UseSSE > 3, "required");
21585 int opcode = this->ideal_Opcode();
21586 bool sign = (opcode != Op_URShiftVB);
21587 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21588 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21589 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21590 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21591 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21592 %}
21593 ins_pipe( pipe_slow );
21594 %}
21595
21596 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21597 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21598 UseAVX <= 1);
21599 match(Set dst ( LShiftVB src shift));
21600 match(Set dst ( RShiftVB src shift));
21601 match(Set dst (URShiftVB src shift));
21602 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21603 format %{"vector_byte_shift $dst,$src,$shift" %}
21604 ins_encode %{
21605 assert(UseSSE > 3, "required");
21606 int opcode = this->ideal_Opcode();
21607 bool sign = (opcode != Op_URShiftVB);
21608 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21609 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21610 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21611 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21612 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21613 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21614 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21615 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21616 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21617 %}
21618 ins_pipe( pipe_slow );
21619 %}
21620
21621 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21622 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21623 UseAVX > 1);
21624 match(Set dst ( LShiftVB src shift));
21625 match(Set dst ( RShiftVB src shift));
21626 match(Set dst (URShiftVB src shift));
21627 effect(TEMP dst, TEMP tmp);
21628 format %{"vector_byte_shift $dst,$src,$shift" %}
21629 ins_encode %{
21630 int opcode = this->ideal_Opcode();
21631 bool sign = (opcode != Op_URShiftVB);
21632 int vlen_enc = Assembler::AVX_256bit;
21633 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21634 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21635 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21636 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21637 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21638 %}
21639 ins_pipe( pipe_slow );
21640 %}
21641
21642 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21643 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21644 match(Set dst ( LShiftVB src shift));
21645 match(Set dst ( RShiftVB src shift));
21646 match(Set dst (URShiftVB src shift));
21647 effect(TEMP dst, TEMP tmp);
21648 format %{"vector_byte_shift $dst,$src,$shift" %}
21649 ins_encode %{
21650 assert(UseAVX > 1, "required");
21651 int opcode = this->ideal_Opcode();
21652 bool sign = (opcode != Op_URShiftVB);
21653 int vlen_enc = Assembler::AVX_256bit;
21654 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21655 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21656 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21657 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21658 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21659 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21660 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21661 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21662 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21663 %}
21664 ins_pipe( pipe_slow );
21665 %}
21666
21667 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21668 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21669 match(Set dst ( LShiftVB src shift));
21670 match(Set dst (RShiftVB src shift));
21671 match(Set dst (URShiftVB src shift));
21672 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21673 format %{"vector_byte_shift $dst,$src,$shift" %}
21674 ins_encode %{
21675 assert(UseAVX > 2, "required");
21676 int opcode = this->ideal_Opcode();
21677 bool sign = (opcode != Op_URShiftVB);
21678 int vlen_enc = Assembler::AVX_512bit;
21679 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21680 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21681 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21682 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21683 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21684 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21685 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21686 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21687 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21688 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21689 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21690 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21691 %}
21692 ins_pipe( pipe_slow );
21693 %}
21694
21695 // Shorts vector logical right shift produces incorrect Java result
21696 // for negative data because java code convert short value into int with
21697 // sign extension before a shift. But char vectors are fine since chars are
21698 // unsigned values.
21699 // Shorts/Chars vector left shift
21700 instruct vshiftS(vec dst, vec src, vec shift) %{
21701 predicate(!n->as_ShiftV()->is_var_shift());
21702 match(Set dst ( LShiftVS src shift));
21703 match(Set dst ( RShiftVS src shift));
21704 match(Set dst (URShiftVS src shift));
21705 effect(TEMP dst, USE src, USE shift);
21706 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21707 ins_encode %{
21708 int opcode = this->ideal_Opcode();
21709 if (UseAVX > 0) {
21710 int vlen_enc = vector_length_encoding(this);
21711 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21712 } else {
21713 int vlen = Matcher::vector_length(this);
21714 if (vlen == 2) {
21715 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21716 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21717 } else if (vlen == 4) {
21718 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21719 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21720 } else {
21721 assert (vlen == 8, "sanity");
21722 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21723 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21724 }
21725 }
21726 %}
21727 ins_pipe( pipe_slow );
21728 %}
21729
21730 // Integers vector left shift
21731 instruct vshiftI(vec dst, vec src, vec shift) %{
21732 predicate(!n->as_ShiftV()->is_var_shift());
21733 match(Set dst ( LShiftVI src shift));
21734 match(Set dst ( RShiftVI src shift));
21735 match(Set dst (URShiftVI src shift));
21736 effect(TEMP dst, USE src, USE shift);
21737 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21738 ins_encode %{
21739 int opcode = this->ideal_Opcode();
21740 if (UseAVX > 0) {
21741 int vlen_enc = vector_length_encoding(this);
21742 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21743 } else {
21744 int vlen = Matcher::vector_length(this);
21745 if (vlen == 2) {
21746 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21747 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21748 } else {
21749 assert(vlen == 4, "sanity");
21750 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21751 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21752 }
21753 }
21754 %}
21755 ins_pipe( pipe_slow );
21756 %}
21757
21758 // Integers vector left constant shift
21759 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21760 match(Set dst (LShiftVI src (LShiftCntV shift)));
21761 match(Set dst (RShiftVI src (RShiftCntV shift)));
21762 match(Set dst (URShiftVI src (RShiftCntV shift)));
21763 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21764 ins_encode %{
21765 int opcode = this->ideal_Opcode();
21766 if (UseAVX > 0) {
21767 int vector_len = vector_length_encoding(this);
21768 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21769 } else {
21770 int vlen = Matcher::vector_length(this);
21771 if (vlen == 2) {
21772 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21773 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21774 } else {
21775 assert(vlen == 4, "sanity");
21776 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21777 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21778 }
21779 }
21780 %}
21781 ins_pipe( pipe_slow );
21782 %}
21783
21784 // Longs vector shift
21785 instruct vshiftL(vec dst, vec src, vec shift) %{
21786 predicate(!n->as_ShiftV()->is_var_shift());
21787 match(Set dst ( LShiftVL src shift));
21788 match(Set dst (URShiftVL src shift));
21789 effect(TEMP dst, USE src, USE shift);
21790 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21791 ins_encode %{
21792 int opcode = this->ideal_Opcode();
21793 if (UseAVX > 0) {
21794 int vlen_enc = vector_length_encoding(this);
21795 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21796 } else {
21797 assert(Matcher::vector_length(this) == 2, "");
21798 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21799 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21800 }
21801 %}
21802 ins_pipe( pipe_slow );
21803 %}
21804
21805 // Longs vector constant shift
21806 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21807 match(Set dst (LShiftVL src (LShiftCntV shift)));
21808 match(Set dst (URShiftVL src (RShiftCntV shift)));
21809 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21810 ins_encode %{
21811 int opcode = this->ideal_Opcode();
21812 if (UseAVX > 0) {
21813 int vector_len = vector_length_encoding(this);
21814 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21815 } else {
21816 assert(Matcher::vector_length(this) == 2, "");
21817 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21818 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21819 }
21820 %}
21821 ins_pipe( pipe_slow );
21822 %}
21823
21824 // -------------------ArithmeticRightShift -----------------------------------
21825 // Long vector arithmetic right shift
21826 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21827 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21828 match(Set dst (RShiftVL src shift));
21829 effect(TEMP dst, TEMP tmp);
21830 format %{ "vshiftq $dst,$src,$shift" %}
21831 ins_encode %{
21832 uint vlen = Matcher::vector_length(this);
21833 if (vlen == 2) {
21834 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21835 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21836 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21837 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21838 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21839 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21840 } else {
21841 assert(vlen == 4, "sanity");
21842 assert(UseAVX > 1, "required");
21843 int vlen_enc = Assembler::AVX_256bit;
21844 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21845 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21846 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21847 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21848 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21849 }
21850 %}
21851 ins_pipe( pipe_slow );
21852 %}
21853
21854 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21855 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21856 match(Set dst (RShiftVL src shift));
21857 format %{ "vshiftq $dst,$src,$shift" %}
21858 ins_encode %{
21859 int vlen_enc = vector_length_encoding(this);
21860 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21861 %}
21862 ins_pipe( pipe_slow );
21863 %}
21864
21865 // ------------------- Variable Shift -----------------------------
21866 // Byte variable shift
21867 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21868 predicate(Matcher::vector_length(n) <= 8 &&
21869 n->as_ShiftV()->is_var_shift() &&
21870 !VM_Version::supports_avx512bw());
21871 match(Set dst ( LShiftVB src shift));
21872 match(Set dst ( RShiftVB src shift));
21873 match(Set dst (URShiftVB src shift));
21874 effect(TEMP dst, TEMP vtmp);
21875 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21876 ins_encode %{
21877 assert(UseAVX >= 2, "required");
21878
21879 int opcode = this->ideal_Opcode();
21880 int vlen_enc = Assembler::AVX_128bit;
21881 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21882 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21883 %}
21884 ins_pipe( pipe_slow );
21885 %}
21886
21887 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21888 predicate(Matcher::vector_length(n) == 16 &&
21889 n->as_ShiftV()->is_var_shift() &&
21890 !VM_Version::supports_avx512bw());
21891 match(Set dst ( LShiftVB src shift));
21892 match(Set dst ( RShiftVB src shift));
21893 match(Set dst (URShiftVB src shift));
21894 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21895 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21896 ins_encode %{
21897 assert(UseAVX >= 2, "required");
21898
21899 int opcode = this->ideal_Opcode();
21900 int vlen_enc = Assembler::AVX_128bit;
21901 // Shift lower half and get word result in dst
21902 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21903
21904 // Shift upper half and get word result in vtmp1
21905 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21906 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21907 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21908
21909 // Merge and down convert the two word results to byte in dst
21910 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21911 %}
21912 ins_pipe( pipe_slow );
21913 %}
21914
21915 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21916 predicate(Matcher::vector_length(n) == 32 &&
21917 n->as_ShiftV()->is_var_shift() &&
21918 !VM_Version::supports_avx512bw());
21919 match(Set dst ( LShiftVB src shift));
21920 match(Set dst ( RShiftVB src shift));
21921 match(Set dst (URShiftVB src shift));
21922 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21923 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21924 ins_encode %{
21925 assert(UseAVX >= 2, "required");
21926
21927 int opcode = this->ideal_Opcode();
21928 int vlen_enc = Assembler::AVX_128bit;
21929 // Process lower 128 bits and get result in dst
21930 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21931 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21932 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21933 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21934 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21935
21936 // Process higher 128 bits and get result in vtmp3
21937 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21938 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21939 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21940 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21941 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21942 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21943 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21944
21945 // Merge the two results in dst
21946 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21947 %}
21948 ins_pipe( pipe_slow );
21949 %}
21950
21951 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21952 predicate(Matcher::vector_length(n) <= 32 &&
21953 n->as_ShiftV()->is_var_shift() &&
21954 VM_Version::supports_avx512bw());
21955 match(Set dst ( LShiftVB src shift));
21956 match(Set dst ( RShiftVB src shift));
21957 match(Set dst (URShiftVB src shift));
21958 effect(TEMP dst, TEMP vtmp);
21959 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21960 ins_encode %{
21961 assert(UseAVX > 2, "required");
21962
21963 int opcode = this->ideal_Opcode();
21964 int vlen_enc = vector_length_encoding(this);
21965 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21966 %}
21967 ins_pipe( pipe_slow );
21968 %}
21969
21970 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21971 predicate(Matcher::vector_length(n) == 64 &&
21972 n->as_ShiftV()->is_var_shift() &&
21973 VM_Version::supports_avx512bw());
21974 match(Set dst ( LShiftVB src shift));
21975 match(Set dst ( RShiftVB src shift));
21976 match(Set dst (URShiftVB src shift));
21977 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21978 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21979 ins_encode %{
21980 assert(UseAVX > 2, "required");
21981
21982 int opcode = this->ideal_Opcode();
21983 int vlen_enc = Assembler::AVX_256bit;
21984 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21985 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21986 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21987 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21988 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21989 %}
21990 ins_pipe( pipe_slow );
21991 %}
21992
21993 // Short variable shift
21994 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21995 predicate(Matcher::vector_length(n) <= 8 &&
21996 n->as_ShiftV()->is_var_shift() &&
21997 !VM_Version::supports_avx512bw());
21998 match(Set dst ( LShiftVS src shift));
21999 match(Set dst ( RShiftVS src shift));
22000 match(Set dst (URShiftVS src shift));
22001 effect(TEMP dst, TEMP vtmp);
22002 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22003 ins_encode %{
22004 assert(UseAVX >= 2, "required");
22005
22006 int opcode = this->ideal_Opcode();
22007 bool sign = (opcode != Op_URShiftVS);
22008 int vlen_enc = Assembler::AVX_256bit;
22009 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22010 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22011 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22012 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22013 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22014 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22015 %}
22016 ins_pipe( pipe_slow );
22017 %}
22018
22019 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22020 predicate(Matcher::vector_length(n) == 16 &&
22021 n->as_ShiftV()->is_var_shift() &&
22022 !VM_Version::supports_avx512bw());
22023 match(Set dst ( LShiftVS src shift));
22024 match(Set dst ( RShiftVS src shift));
22025 match(Set dst (URShiftVS src shift));
22026 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22027 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22028 ins_encode %{
22029 assert(UseAVX >= 2, "required");
22030
22031 int opcode = this->ideal_Opcode();
22032 bool sign = (opcode != Op_URShiftVS);
22033 int vlen_enc = Assembler::AVX_256bit;
22034 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22035 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22036 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22037 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22038 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22039
22040 // Shift upper half, with result in dst using vtmp1 as TEMP
22041 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22042 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22043 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22044 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22045 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22046 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22047
22048 // Merge lower and upper half result into dst
22049 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22050 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22051 %}
22052 ins_pipe( pipe_slow );
22053 %}
22054
22055 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22056 predicate(n->as_ShiftV()->is_var_shift() &&
22057 VM_Version::supports_avx512bw());
22058 match(Set dst ( LShiftVS src shift));
22059 match(Set dst ( RShiftVS src shift));
22060 match(Set dst (URShiftVS src shift));
22061 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22062 ins_encode %{
22063 assert(UseAVX > 2, "required");
22064
22065 int opcode = this->ideal_Opcode();
22066 int vlen_enc = vector_length_encoding(this);
22067 if (!VM_Version::supports_avx512vl()) {
22068 vlen_enc = Assembler::AVX_512bit;
22069 }
22070 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22071 %}
22072 ins_pipe( pipe_slow );
22073 %}
22074
22075 //Integer variable shift
22076 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22077 predicate(n->as_ShiftV()->is_var_shift());
22078 match(Set dst ( LShiftVI src shift));
22079 match(Set dst ( RShiftVI src shift));
22080 match(Set dst (URShiftVI src shift));
22081 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22082 ins_encode %{
22083 assert(UseAVX >= 2, "required");
22084
22085 int opcode = this->ideal_Opcode();
22086 int vlen_enc = vector_length_encoding(this);
22087 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22088 %}
22089 ins_pipe( pipe_slow );
22090 %}
22091
22092 //Long variable shift
22093 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22094 predicate(n->as_ShiftV()->is_var_shift());
22095 match(Set dst ( LShiftVL src shift));
22096 match(Set dst (URShiftVL src shift));
22097 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22098 ins_encode %{
22099 assert(UseAVX >= 2, "required");
22100
22101 int opcode = this->ideal_Opcode();
22102 int vlen_enc = vector_length_encoding(this);
22103 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22104 %}
22105 ins_pipe( pipe_slow );
22106 %}
22107
22108 //Long variable right shift arithmetic
22109 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22110 predicate(Matcher::vector_length(n) <= 4 &&
22111 n->as_ShiftV()->is_var_shift() &&
22112 UseAVX == 2);
22113 match(Set dst (RShiftVL src shift));
22114 effect(TEMP dst, TEMP vtmp);
22115 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22116 ins_encode %{
22117 int opcode = this->ideal_Opcode();
22118 int vlen_enc = vector_length_encoding(this);
22119 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22120 $vtmp$$XMMRegister);
22121 %}
22122 ins_pipe( pipe_slow );
22123 %}
22124
22125 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22126 predicate(n->as_ShiftV()->is_var_shift() &&
22127 UseAVX > 2);
22128 match(Set dst (RShiftVL src shift));
22129 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22130 ins_encode %{
22131 int opcode = this->ideal_Opcode();
22132 int vlen_enc = vector_length_encoding(this);
22133 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22134 %}
22135 ins_pipe( pipe_slow );
22136 %}
22137
22138 // --------------------------------- AND --------------------------------------
22139
22140 instruct vand(vec dst, vec src) %{
22141 predicate(UseAVX == 0);
22142 match(Set dst (AndV dst src));
22143 format %{ "pand $dst,$src\t! and vectors" %}
22144 ins_encode %{
22145 __ pand($dst$$XMMRegister, $src$$XMMRegister);
22146 %}
22147 ins_pipe( pipe_slow );
22148 %}
22149
22150 instruct vand_reg(vec dst, vec src1, vec src2) %{
22151 predicate(UseAVX > 0);
22152 match(Set dst (AndV src1 src2));
22153 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22154 ins_encode %{
22155 int vlen_enc = vector_length_encoding(this);
22156 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22157 %}
22158 ins_pipe( pipe_slow );
22159 %}
22160
22161 instruct vand_mem(vec dst, vec src, memory mem) %{
22162 predicate((UseAVX > 0) &&
22163 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22164 match(Set dst (AndV src (LoadVector mem)));
22165 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22166 ins_encode %{
22167 int vlen_enc = vector_length_encoding(this);
22168 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22169 %}
22170 ins_pipe( pipe_slow );
22171 %}
22172
22173 // --------------------------------- OR ---------------------------------------
22174
22175 instruct vor(vec dst, vec src) %{
22176 predicate(UseAVX == 0);
22177 match(Set dst (OrV dst src));
22178 format %{ "por $dst,$src\t! or vectors" %}
22179 ins_encode %{
22180 __ por($dst$$XMMRegister, $src$$XMMRegister);
22181 %}
22182 ins_pipe( pipe_slow );
22183 %}
22184
22185 instruct vor_reg(vec dst, vec src1, vec src2) %{
22186 predicate(UseAVX > 0);
22187 match(Set dst (OrV src1 src2));
22188 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22189 ins_encode %{
22190 int vlen_enc = vector_length_encoding(this);
22191 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22192 %}
22193 ins_pipe( pipe_slow );
22194 %}
22195
22196 instruct vor_mem(vec dst, vec src, memory mem) %{
22197 predicate((UseAVX > 0) &&
22198 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22199 match(Set dst (OrV src (LoadVector mem)));
22200 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22201 ins_encode %{
22202 int vlen_enc = vector_length_encoding(this);
22203 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22204 %}
22205 ins_pipe( pipe_slow );
22206 %}
22207
22208 // --------------------------------- XOR --------------------------------------
22209
22210 instruct vxor(vec dst, vec src) %{
22211 predicate(UseAVX == 0);
22212 match(Set dst (XorV dst src));
22213 format %{ "pxor $dst,$src\t! xor vectors" %}
22214 ins_encode %{
22215 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22216 %}
22217 ins_pipe( pipe_slow );
22218 %}
22219
22220 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22221 predicate(UseAVX > 0);
22222 match(Set dst (XorV src1 src2));
22223 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22224 ins_encode %{
22225 int vlen_enc = vector_length_encoding(this);
22226 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22227 %}
22228 ins_pipe( pipe_slow );
22229 %}
22230
22231 instruct vxor_mem(vec dst, vec src, memory mem) %{
22232 predicate((UseAVX > 0) &&
22233 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22234 match(Set dst (XorV src (LoadVector mem)));
22235 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22236 ins_encode %{
22237 int vlen_enc = vector_length_encoding(this);
22238 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22239 %}
22240 ins_pipe( pipe_slow );
22241 %}
22242
22243 // --------------------------------- VectorCast --------------------------------------
22244
22245 instruct vcastBtoX(vec dst, vec src) %{
22246 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22247 match(Set dst (VectorCastB2X src));
22248 format %{ "vector_cast_b2x $dst,$src\t!" %}
22249 ins_encode %{
22250 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22251 int vlen_enc = vector_length_encoding(this);
22252 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22253 %}
22254 ins_pipe( pipe_slow );
22255 %}
22256
22257 instruct vcastBtoD(legVec dst, legVec src) %{
22258 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22259 match(Set dst (VectorCastB2X src));
22260 format %{ "vector_cast_b2x $dst,$src\t!" %}
22261 ins_encode %{
22262 int vlen_enc = vector_length_encoding(this);
22263 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22264 %}
22265 ins_pipe( pipe_slow );
22266 %}
22267
22268 instruct castStoX(vec dst, vec src) %{
22269 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22270 Matcher::vector_length(n->in(1)) <= 8 && // src
22271 Matcher::vector_element_basic_type(n) == T_BYTE);
22272 match(Set dst (VectorCastS2X src));
22273 format %{ "vector_cast_s2x $dst,$src" %}
22274 ins_encode %{
22275 assert(UseAVX > 0, "required");
22276
22277 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22278 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22279 %}
22280 ins_pipe( pipe_slow );
22281 %}
22282
22283 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22284 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22285 Matcher::vector_length(n->in(1)) == 16 && // src
22286 Matcher::vector_element_basic_type(n) == T_BYTE);
22287 effect(TEMP dst, TEMP vtmp);
22288 match(Set dst (VectorCastS2X src));
22289 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22290 ins_encode %{
22291 assert(UseAVX > 0, "required");
22292
22293 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22294 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22295 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22296 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22297 %}
22298 ins_pipe( pipe_slow );
22299 %}
22300
22301 instruct vcastStoX_evex(vec dst, vec src) %{
22302 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22303 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22304 match(Set dst (VectorCastS2X src));
22305 format %{ "vector_cast_s2x $dst,$src\t!" %}
22306 ins_encode %{
22307 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22308 int src_vlen_enc = vector_length_encoding(this, $src);
22309 int vlen_enc = vector_length_encoding(this);
22310 switch (to_elem_bt) {
22311 case T_BYTE:
22312 if (!VM_Version::supports_avx512vl()) {
22313 vlen_enc = Assembler::AVX_512bit;
22314 }
22315 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22316 break;
22317 case T_INT:
22318 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22319 break;
22320 case T_FLOAT:
22321 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22322 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22323 break;
22324 case T_LONG:
22325 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22326 break;
22327 case T_DOUBLE: {
22328 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22329 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22330 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22331 break;
22332 }
22333 default:
22334 ShouldNotReachHere();
22335 }
22336 %}
22337 ins_pipe( pipe_slow );
22338 %}
22339
22340 instruct castItoX(vec dst, vec src) %{
22341 predicate(UseAVX <= 2 &&
22342 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22343 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22344 match(Set dst (VectorCastI2X src));
22345 format %{ "vector_cast_i2x $dst,$src" %}
22346 ins_encode %{
22347 assert(UseAVX > 0, "required");
22348
22349 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22350 int vlen_enc = vector_length_encoding(this, $src);
22351
22352 if (to_elem_bt == T_BYTE) {
22353 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22354 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22355 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22356 } else {
22357 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22358 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22359 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22360 }
22361 %}
22362 ins_pipe( pipe_slow );
22363 %}
22364
22365 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22366 predicate(UseAVX <= 2 &&
22367 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22368 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22369 match(Set dst (VectorCastI2X src));
22370 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22371 effect(TEMP dst, TEMP vtmp);
22372 ins_encode %{
22373 assert(UseAVX > 0, "required");
22374
22375 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22376 int vlen_enc = vector_length_encoding(this, $src);
22377
22378 if (to_elem_bt == T_BYTE) {
22379 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22380 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22381 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22382 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22383 } else {
22384 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22385 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22386 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22387 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22388 }
22389 %}
22390 ins_pipe( pipe_slow );
22391 %}
22392
22393 instruct vcastItoX_evex(vec dst, vec src) %{
22394 predicate(UseAVX > 2 ||
22395 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22396 match(Set dst (VectorCastI2X src));
22397 format %{ "vector_cast_i2x $dst,$src\t!" %}
22398 ins_encode %{
22399 assert(UseAVX > 0, "required");
22400
22401 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22402 int src_vlen_enc = vector_length_encoding(this, $src);
22403 int dst_vlen_enc = vector_length_encoding(this);
22404 switch (dst_elem_bt) {
22405 case T_BYTE:
22406 if (!VM_Version::supports_avx512vl()) {
22407 src_vlen_enc = Assembler::AVX_512bit;
22408 }
22409 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22410 break;
22411 case T_SHORT:
22412 if (!VM_Version::supports_avx512vl()) {
22413 src_vlen_enc = Assembler::AVX_512bit;
22414 }
22415 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22416 break;
22417 case T_FLOAT:
22418 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22419 break;
22420 case T_LONG:
22421 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22422 break;
22423 case T_DOUBLE:
22424 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22425 break;
22426 default:
22427 ShouldNotReachHere();
22428 }
22429 %}
22430 ins_pipe( pipe_slow );
22431 %}
22432
22433 instruct vcastLtoBS(vec dst, vec src) %{
22434 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22435 UseAVX <= 2);
22436 match(Set dst (VectorCastL2X src));
22437 format %{ "vector_cast_l2x $dst,$src" %}
22438 ins_encode %{
22439 assert(UseAVX > 0, "required");
22440
22441 int vlen = Matcher::vector_length_in_bytes(this, $src);
22442 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22443 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22444 : ExternalAddress(vector_int_to_short_mask());
22445 if (vlen <= 16) {
22446 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22447 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22448 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22449 } else {
22450 assert(vlen <= 32, "required");
22451 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22452 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22453 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22454 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22455 }
22456 if (to_elem_bt == T_BYTE) {
22457 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22458 }
22459 %}
22460 ins_pipe( pipe_slow );
22461 %}
22462
22463 instruct vcastLtoX_evex(vec dst, vec src) %{
22464 predicate(UseAVX > 2 ||
22465 (Matcher::vector_element_basic_type(n) == T_INT ||
22466 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22467 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22468 match(Set dst (VectorCastL2X src));
22469 format %{ "vector_cast_l2x $dst,$src\t!" %}
22470 ins_encode %{
22471 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22472 int vlen = Matcher::vector_length_in_bytes(this, $src);
22473 int vlen_enc = vector_length_encoding(this, $src);
22474 switch (to_elem_bt) {
22475 case T_BYTE:
22476 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22477 vlen_enc = Assembler::AVX_512bit;
22478 }
22479 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22480 break;
22481 case T_SHORT:
22482 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22483 vlen_enc = Assembler::AVX_512bit;
22484 }
22485 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22486 break;
22487 case T_INT:
22488 if (vlen == 8) {
22489 if ($dst$$XMMRegister != $src$$XMMRegister) {
22490 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22491 }
22492 } else if (vlen == 16) {
22493 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22494 } else if (vlen == 32) {
22495 if (UseAVX > 2) {
22496 if (!VM_Version::supports_avx512vl()) {
22497 vlen_enc = Assembler::AVX_512bit;
22498 }
22499 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22500 } else {
22501 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22502 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22503 }
22504 } else { // vlen == 64
22505 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22506 }
22507 break;
22508 case T_FLOAT:
22509 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22510 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22511 break;
22512 case T_DOUBLE:
22513 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22514 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22515 break;
22516
22517 default: assert(false, "%s", type2name(to_elem_bt));
22518 }
22519 %}
22520 ins_pipe( pipe_slow );
22521 %}
22522
22523 instruct vcastFtoD_reg(vec dst, vec src) %{
22524 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22525 match(Set dst (VectorCastF2X src));
22526 format %{ "vector_cast_f2d $dst,$src\t!" %}
22527 ins_encode %{
22528 int vlen_enc = vector_length_encoding(this);
22529 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22530 %}
22531 ins_pipe( pipe_slow );
22532 %}
22533
22534
22535 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22536 predicate(!VM_Version::supports_avx10_2() &&
22537 !VM_Version::supports_avx512vl() &&
22538 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22539 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22540 is_integral_type(Matcher::vector_element_basic_type(n)));
22541 match(Set dst (VectorCastF2X src));
22542 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22543 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22544 ins_encode %{
22545 int vlen_enc = vector_length_encoding(this, $src);
22546 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22547 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22548 // 32 bit addresses for register indirect addressing mode since stub constants
22549 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22550 // However, targets are free to increase this limit, but having a large code cache size
22551 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22552 // cap we save a temporary register allocation which in limiting case can prevent
22553 // spilling in high register pressure blocks.
22554 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22555 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22556 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22557 %}
22558 ins_pipe( pipe_slow );
22559 %}
22560
22561 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22562 predicate(!VM_Version::supports_avx10_2() &&
22563 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22564 is_integral_type(Matcher::vector_element_basic_type(n)));
22565 match(Set dst (VectorCastF2X src));
22566 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22567 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22568 ins_encode %{
22569 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22570 if (to_elem_bt == T_LONG) {
22571 int vlen_enc = vector_length_encoding(this);
22572 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22573 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22574 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22575 } else {
22576 int vlen_enc = vector_length_encoding(this, $src);
22577 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22578 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22579 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22580 }
22581 %}
22582 ins_pipe( pipe_slow );
22583 %}
22584
22585 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22586 predicate(VM_Version::supports_avx10_2() &&
22587 is_integral_type(Matcher::vector_element_basic_type(n)));
22588 match(Set dst (VectorCastF2X src));
22589 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22590 ins_encode %{
22591 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22592 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22593 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22594 %}
22595 ins_pipe( pipe_slow );
22596 %}
22597
22598 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22599 predicate(VM_Version::supports_avx10_2() &&
22600 is_integral_type(Matcher::vector_element_basic_type(n)));
22601 match(Set dst (VectorCastF2X (LoadVector src)));
22602 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22603 ins_encode %{
22604 int vlen = Matcher::vector_length(this);
22605 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22606 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22607 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22608 %}
22609 ins_pipe( pipe_slow );
22610 %}
22611
22612 instruct vcastDtoF_reg(vec dst, vec src) %{
22613 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22614 match(Set dst (VectorCastD2X src));
22615 format %{ "vector_cast_d2x $dst,$src\t!" %}
22616 ins_encode %{
22617 int vlen_enc = vector_length_encoding(this, $src);
22618 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22619 %}
22620 ins_pipe( pipe_slow );
22621 %}
22622
22623 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22624 predicate(!VM_Version::supports_avx10_2() &&
22625 !VM_Version::supports_avx512vl() &&
22626 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22627 is_integral_type(Matcher::vector_element_basic_type(n)));
22628 match(Set dst (VectorCastD2X src));
22629 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22630 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22631 ins_encode %{
22632 int vlen_enc = vector_length_encoding(this, $src);
22633 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22634 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22635 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22636 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22637 %}
22638 ins_pipe( pipe_slow );
22639 %}
22640
22641 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22642 predicate(!VM_Version::supports_avx10_2() &&
22643 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22644 is_integral_type(Matcher::vector_element_basic_type(n)));
22645 match(Set dst (VectorCastD2X src));
22646 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22647 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22648 ins_encode %{
22649 int vlen_enc = vector_length_encoding(this, $src);
22650 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22651 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22652 ExternalAddress(vector_float_signflip());
22653 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22654 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22655 %}
22656 ins_pipe( pipe_slow );
22657 %}
22658
22659 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22660 predicate(VM_Version::supports_avx10_2() &&
22661 is_integral_type(Matcher::vector_element_basic_type(n)));
22662 match(Set dst (VectorCastD2X src));
22663 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22664 ins_encode %{
22665 int vlen_enc = vector_length_encoding(this, $src);
22666 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22667 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22668 %}
22669 ins_pipe( pipe_slow );
22670 %}
22671
22672 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22673 predicate(VM_Version::supports_avx10_2() &&
22674 is_integral_type(Matcher::vector_element_basic_type(n)));
22675 match(Set dst (VectorCastD2X (LoadVector src)));
22676 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22677 ins_encode %{
22678 int vlen = Matcher::vector_length(this);
22679 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22680 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22681 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22682 %}
22683 ins_pipe( pipe_slow );
22684 %}
22685
22686 instruct vucast(vec dst, vec src) %{
22687 match(Set dst (VectorUCastB2X src));
22688 match(Set dst (VectorUCastS2X src));
22689 match(Set dst (VectorUCastI2X src));
22690 format %{ "vector_ucast $dst,$src\t!" %}
22691 ins_encode %{
22692 assert(UseAVX > 0, "required");
22693
22694 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22695 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22696 int vlen_enc = vector_length_encoding(this);
22697 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22698 %}
22699 ins_pipe( pipe_slow );
22700 %}
22701
22702 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22703 predicate(!VM_Version::supports_avx512vl() &&
22704 Matcher::vector_length_in_bytes(n) < 64 &&
22705 Matcher::vector_element_basic_type(n) == T_INT);
22706 match(Set dst (RoundVF src));
22707 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22708 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22709 ins_encode %{
22710 int vlen_enc = vector_length_encoding(this);
22711 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22712 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22713 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22714 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22715 %}
22716 ins_pipe( pipe_slow );
22717 %}
22718
22719 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22720 predicate((VM_Version::supports_avx512vl() ||
22721 Matcher::vector_length_in_bytes(n) == 64) &&
22722 Matcher::vector_element_basic_type(n) == T_INT);
22723 match(Set dst (RoundVF src));
22724 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22725 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22726 ins_encode %{
22727 int vlen_enc = vector_length_encoding(this);
22728 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22729 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22730 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22731 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22732 %}
22733 ins_pipe( pipe_slow );
22734 %}
22735
22736 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22737 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22738 match(Set dst (RoundVD src));
22739 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22740 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22741 ins_encode %{
22742 int vlen_enc = vector_length_encoding(this);
22743 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22744 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22745 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22746 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22747 %}
22748 ins_pipe( pipe_slow );
22749 %}
22750
22751 // --------------------------------- VectorMaskCmp --------------------------------------
22752
22753 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22754 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22755 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22756 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22757 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22758 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22759 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22760 ins_encode %{
22761 int vlen_enc = vector_length_encoding(this, $src1);
22762 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22763 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22764 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22765 } else {
22766 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22767 }
22768 %}
22769 ins_pipe( pipe_slow );
22770 %}
22771
22772 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22773 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22774 n->bottom_type()->isa_vectmask() == nullptr &&
22775 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22776 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22777 effect(TEMP ktmp);
22778 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22779 ins_encode %{
22780 int vlen_enc = Assembler::AVX_512bit;
22781 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22782 KRegister mask = k0; // The comparison itself is not being masked.
22783 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22784 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22785 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22786 } else {
22787 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22788 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22789 }
22790 %}
22791 ins_pipe( pipe_slow );
22792 %}
22793
22794 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22795 predicate(n->bottom_type()->isa_vectmask() &&
22796 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22797 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22798 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22799 ins_encode %{
22800 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22801 int vlen_enc = vector_length_encoding(this, $src1);
22802 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22803 KRegister mask = k0; // The comparison itself is not being masked.
22804 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22805 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22806 } else {
22807 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22808 }
22809 %}
22810 ins_pipe( pipe_slow );
22811 %}
22812
22813 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22814 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22815 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22816 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22817 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22818 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22819 (n->in(2)->get_int() == BoolTest::eq ||
22820 n->in(2)->get_int() == BoolTest::lt ||
22821 n->in(2)->get_int() == BoolTest::gt)); // cond
22822 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22823 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22824 ins_encode %{
22825 int vlen_enc = vector_length_encoding(this, $src1);
22826 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22827 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22828 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22829 %}
22830 ins_pipe( pipe_slow );
22831 %}
22832
22833 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22834 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22835 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22836 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22837 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22838 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22839 (n->in(2)->get_int() == BoolTest::ne ||
22840 n->in(2)->get_int() == BoolTest::le ||
22841 n->in(2)->get_int() == BoolTest::ge)); // cond
22842 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22843 effect(TEMP dst, TEMP xtmp);
22844 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22845 ins_encode %{
22846 int vlen_enc = vector_length_encoding(this, $src1);
22847 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22848 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22849 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22850 %}
22851 ins_pipe( pipe_slow );
22852 %}
22853
22854 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22855 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22856 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22857 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22858 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22859 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22860 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22861 effect(TEMP dst, TEMP xtmp);
22862 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22863 ins_encode %{
22864 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22865 int vlen_enc = vector_length_encoding(this, $src1);
22866 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22867 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22868
22869 if (vlen_enc == Assembler::AVX_128bit) {
22870 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22871 } else {
22872 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22873 }
22874 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22875 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22876 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22877 %}
22878 ins_pipe( pipe_slow );
22879 %}
22880
22881 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22882 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22883 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22884 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22885 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22886 effect(TEMP ktmp);
22887 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22888 ins_encode %{
22889 assert(UseAVX > 2, "required");
22890
22891 int vlen_enc = vector_length_encoding(this, $src1);
22892 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22893 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22894 KRegister mask = k0; // The comparison itself is not being masked.
22895 bool merge = false;
22896 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22897
22898 switch (src1_elem_bt) {
22899 case T_INT: {
22900 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22901 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22902 break;
22903 }
22904 case T_LONG: {
22905 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22906 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22907 break;
22908 }
22909 default: assert(false, "%s", type2name(src1_elem_bt));
22910 }
22911 %}
22912 ins_pipe( pipe_slow );
22913 %}
22914
22915
22916 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22917 predicate(n->bottom_type()->isa_vectmask() &&
22918 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22919 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22920 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22921 ins_encode %{
22922 assert(UseAVX > 2, "required");
22923 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22924
22925 int vlen_enc = vector_length_encoding(this, $src1);
22926 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22927 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22928 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22929
22930 // Comparison i
22931 switch (src1_elem_bt) {
22932 case T_BYTE: {
22933 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22934 break;
22935 }
22936 case T_SHORT: {
22937 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22938 break;
22939 }
22940 case T_INT: {
22941 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22942 break;
22943 }
22944 case T_LONG: {
22945 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22946 break;
22947 }
22948 default: assert(false, "%s", type2name(src1_elem_bt));
22949 }
22950 %}
22951 ins_pipe( pipe_slow );
22952 %}
22953
22954 // Extract
22955
22956 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22957 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22958 match(Set dst (ExtractI src idx));
22959 match(Set dst (ExtractS src idx));
22960 match(Set dst (ExtractB src idx));
22961 format %{ "extractI $dst,$src,$idx\t!" %}
22962 ins_encode %{
22963 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22964
22965 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22966 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22967 %}
22968 ins_pipe( pipe_slow );
22969 %}
22970
22971 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22972 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22973 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22974 match(Set dst (ExtractI src idx));
22975 match(Set dst (ExtractS src idx));
22976 match(Set dst (ExtractB src idx));
22977 effect(TEMP vtmp);
22978 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22979 ins_encode %{
22980 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22981
22982 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22983 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22984 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22985 %}
22986 ins_pipe( pipe_slow );
22987 %}
22988
22989 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22990 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22991 match(Set dst (ExtractL src idx));
22992 format %{ "extractL $dst,$src,$idx\t!" %}
22993 ins_encode %{
22994 assert(UseSSE >= 4, "required");
22995 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22996
22997 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22998 %}
22999 ins_pipe( pipe_slow );
23000 %}
23001
23002 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23003 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23004 Matcher::vector_length(n->in(1)) == 8); // src
23005 match(Set dst (ExtractL src idx));
23006 effect(TEMP vtmp);
23007 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23008 ins_encode %{
23009 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23010
23011 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23012 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23013 %}
23014 ins_pipe( pipe_slow );
23015 %}
23016
23017 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23018 predicate(Matcher::vector_length(n->in(1)) <= 4);
23019 match(Set dst (ExtractF src idx));
23020 effect(TEMP dst, TEMP vtmp);
23021 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23022 ins_encode %{
23023 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23024
23025 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23026 %}
23027 ins_pipe( pipe_slow );
23028 %}
23029
23030 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23031 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23032 Matcher::vector_length(n->in(1)/*src*/) == 16);
23033 match(Set dst (ExtractF src idx));
23034 effect(TEMP vtmp);
23035 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23036 ins_encode %{
23037 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23038
23039 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23040 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23041 %}
23042 ins_pipe( pipe_slow );
23043 %}
23044
23045 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23046 predicate(Matcher::vector_length(n->in(1)) == 2); // src
23047 match(Set dst (ExtractD src idx));
23048 format %{ "extractD $dst,$src,$idx\t!" %}
23049 ins_encode %{
23050 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23051
23052 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23053 %}
23054 ins_pipe( pipe_slow );
23055 %}
23056
23057 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23058 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23059 Matcher::vector_length(n->in(1)) == 8); // src
23060 match(Set dst (ExtractD src idx));
23061 effect(TEMP vtmp);
23062 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23063 ins_encode %{
23064 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23065
23066 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23067 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23068 %}
23069 ins_pipe( pipe_slow );
23070 %}
23071
23072 // --------------------------------- Vector Blend --------------------------------------
23073
23074 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23075 predicate(UseAVX == 0);
23076 match(Set dst (VectorBlend (Binary dst src) mask));
23077 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
23078 effect(TEMP tmp);
23079 ins_encode %{
23080 assert(UseSSE >= 4, "required");
23081
23082 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23083 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23084 }
23085 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23086 %}
23087 ins_pipe( pipe_slow );
23088 %}
23089
23090 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23091 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23092 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23093 Matcher::vector_length_in_bytes(n) <= 32 &&
23094 is_integral_type(Matcher::vector_element_basic_type(n)));
23095 match(Set dst (VectorBlend (Binary src1 src2) mask));
23096 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23097 ins_encode %{
23098 int vlen_enc = vector_length_encoding(this);
23099 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23100 %}
23101 ins_pipe( pipe_slow );
23102 %}
23103
23104 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23105 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23106 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23107 Matcher::vector_length_in_bytes(n) <= 32 &&
23108 !is_integral_type(Matcher::vector_element_basic_type(n)));
23109 match(Set dst (VectorBlend (Binary src1 src2) mask));
23110 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23111 ins_encode %{
23112 int vlen_enc = vector_length_encoding(this);
23113 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23114 %}
23115 ins_pipe( pipe_slow );
23116 %}
23117
23118 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23119 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23120 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23121 Matcher::vector_length_in_bytes(n) <= 32);
23122 match(Set dst (VectorBlend (Binary src1 src2) mask));
23123 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23124 effect(TEMP vtmp, TEMP dst);
23125 ins_encode %{
23126 int vlen_enc = vector_length_encoding(this);
23127 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23128 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23129 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23130 %}
23131 ins_pipe( pipe_slow );
23132 %}
23133
23134 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23135 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23136 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23137 match(Set dst (VectorBlend (Binary src1 src2) mask));
23138 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23139 effect(TEMP ktmp);
23140 ins_encode %{
23141 int vlen_enc = Assembler::AVX_512bit;
23142 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23143 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23144 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23145 %}
23146 ins_pipe( pipe_slow );
23147 %}
23148
23149
23150 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23151 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23152 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23153 VM_Version::supports_avx512bw()));
23154 match(Set dst (VectorBlend (Binary src1 src2) mask));
23155 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23156 ins_encode %{
23157 int vlen_enc = vector_length_encoding(this);
23158 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23159 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23160 %}
23161 ins_pipe( pipe_slow );
23162 %}
23163
23164 // --------------------------------- ABS --------------------------------------
23165 // a = |a|
23166 instruct vabsB_reg(vec dst, vec src) %{
23167 match(Set dst (AbsVB src));
23168 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23169 ins_encode %{
23170 uint vlen = Matcher::vector_length(this);
23171 if (vlen <= 16) {
23172 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23173 } else {
23174 int vlen_enc = vector_length_encoding(this);
23175 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23176 }
23177 %}
23178 ins_pipe( pipe_slow );
23179 %}
23180
23181 instruct vabsS_reg(vec dst, vec src) %{
23182 match(Set dst (AbsVS src));
23183 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23184 ins_encode %{
23185 uint vlen = Matcher::vector_length(this);
23186 if (vlen <= 8) {
23187 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23188 } else {
23189 int vlen_enc = vector_length_encoding(this);
23190 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23191 }
23192 %}
23193 ins_pipe( pipe_slow );
23194 %}
23195
23196 instruct vabsI_reg(vec dst, vec src) %{
23197 match(Set dst (AbsVI src));
23198 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23199 ins_encode %{
23200 uint vlen = Matcher::vector_length(this);
23201 if (vlen <= 4) {
23202 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23203 } else {
23204 int vlen_enc = vector_length_encoding(this);
23205 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23206 }
23207 %}
23208 ins_pipe( pipe_slow );
23209 %}
23210
23211 instruct vabsL_reg(vec dst, vec src) %{
23212 match(Set dst (AbsVL src));
23213 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23214 ins_encode %{
23215 assert(UseAVX > 2, "required");
23216 int vlen_enc = vector_length_encoding(this);
23217 if (!VM_Version::supports_avx512vl()) {
23218 vlen_enc = Assembler::AVX_512bit;
23219 }
23220 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23221 %}
23222 ins_pipe( pipe_slow );
23223 %}
23224
23225 // --------------------------------- ABSNEG --------------------------------------
23226
23227 instruct vabsnegF(vec dst, vec src) %{
23228 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23229 match(Set dst (AbsVF src));
23230 match(Set dst (NegVF src));
23231 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23232 ins_cost(150);
23233 ins_encode %{
23234 int opcode = this->ideal_Opcode();
23235 int vlen = Matcher::vector_length(this);
23236 if (vlen == 2) {
23237 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23238 } else {
23239 assert(vlen == 8 || vlen == 16, "required");
23240 int vlen_enc = vector_length_encoding(this);
23241 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23242 }
23243 %}
23244 ins_pipe( pipe_slow );
23245 %}
23246
23247 instruct vabsneg4F(vec dst) %{
23248 predicate(Matcher::vector_length(n) == 4);
23249 match(Set dst (AbsVF dst));
23250 match(Set dst (NegVF dst));
23251 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23252 ins_cost(150);
23253 ins_encode %{
23254 int opcode = this->ideal_Opcode();
23255 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23256 %}
23257 ins_pipe( pipe_slow );
23258 %}
23259
23260 instruct vabsnegD(vec dst, vec src) %{
23261 match(Set dst (AbsVD src));
23262 match(Set dst (NegVD src));
23263 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23264 ins_encode %{
23265 int opcode = this->ideal_Opcode();
23266 uint vlen = Matcher::vector_length(this);
23267 if (vlen == 2) {
23268 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23269 } else {
23270 int vlen_enc = vector_length_encoding(this);
23271 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23272 }
23273 %}
23274 ins_pipe( pipe_slow );
23275 %}
23276
23277 //------------------------------------- VectorTest --------------------------------------------
23278
23279 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23280 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23281 match(Set cr (VectorTest src1 src2));
23282 effect(TEMP vtmp);
23283 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23284 ins_encode %{
23285 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23286 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23287 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23288 %}
23289 ins_pipe( pipe_slow );
23290 %}
23291
23292 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23293 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23294 match(Set cr (VectorTest src1 src2));
23295 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23296 ins_encode %{
23297 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23298 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23299 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23300 %}
23301 ins_pipe( pipe_slow );
23302 %}
23303
23304 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23305 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23306 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23307 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23308 match(Set cr (VectorTest src1 src2));
23309 effect(TEMP tmp);
23310 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23311 ins_encode %{
23312 uint masklen = Matcher::vector_length(this, $src1);
23313 __ kmovwl($tmp$$Register, $src1$$KRegister);
23314 __ andl($tmp$$Register, (1 << masklen) - 1);
23315 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23316 %}
23317 ins_pipe( pipe_slow );
23318 %}
23319
23320 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23321 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23322 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23323 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23324 match(Set cr (VectorTest src1 src2));
23325 effect(TEMP tmp);
23326 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23327 ins_encode %{
23328 uint masklen = Matcher::vector_length(this, $src1);
23329 __ kmovwl($tmp$$Register, $src1$$KRegister);
23330 __ andl($tmp$$Register, (1 << masklen) - 1);
23331 %}
23332 ins_pipe( pipe_slow );
23333 %}
23334
23335 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23336 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23337 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23338 match(Set cr (VectorTest src1 src2));
23339 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23340 ins_encode %{
23341 uint masklen = Matcher::vector_length(this, $src1);
23342 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23343 %}
23344 ins_pipe( pipe_slow );
23345 %}
23346
23347 //------------------------------------- LoadMask --------------------------------------------
23348
23349 instruct loadMask(legVec dst, legVec src) %{
23350 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23351 match(Set dst (VectorLoadMask src));
23352 effect(TEMP dst);
23353 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23354 ins_encode %{
23355 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23356 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23357 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23358 %}
23359 ins_pipe( pipe_slow );
23360 %}
23361
23362 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23363 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23364 match(Set dst (VectorLoadMask src));
23365 effect(TEMP xtmp);
23366 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23367 ins_encode %{
23368 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23369 true, Assembler::AVX_512bit);
23370 %}
23371 ins_pipe( pipe_slow );
23372 %}
23373
23374 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23375 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23376 match(Set dst (VectorLoadMask src));
23377 effect(TEMP xtmp);
23378 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23379 ins_encode %{
23380 int vlen_enc = vector_length_encoding(in(1));
23381 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23382 false, vlen_enc);
23383 %}
23384 ins_pipe( pipe_slow );
23385 %}
23386
23387 //------------------------------------- StoreMask --------------------------------------------
23388
23389 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23390 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23391 match(Set dst (VectorStoreMask src size));
23392 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23393 ins_encode %{
23394 int vlen = Matcher::vector_length(this);
23395 if (vlen <= 16 && UseAVX <= 2) {
23396 assert(UseSSE >= 3, "required");
23397 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23398 } else {
23399 assert(UseAVX > 0, "required");
23400 int src_vlen_enc = vector_length_encoding(this, $src);
23401 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23402 }
23403 %}
23404 ins_pipe( pipe_slow );
23405 %}
23406
23407 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23408 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23409 match(Set dst (VectorStoreMask src size));
23410 effect(TEMP_DEF dst, TEMP xtmp);
23411 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23412 ins_encode %{
23413 int vlen_enc = Assembler::AVX_128bit;
23414 int vlen = Matcher::vector_length(this);
23415 if (vlen <= 8) {
23416 assert(UseSSE >= 3, "required");
23417 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23418 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23419 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23420 } else {
23421 assert(UseAVX > 0, "required");
23422 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23423 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23424 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23425 }
23426 %}
23427 ins_pipe( pipe_slow );
23428 %}
23429
23430 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23431 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23432 match(Set dst (VectorStoreMask src size));
23433 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23434 effect(TEMP_DEF dst, TEMP xtmp);
23435 ins_encode %{
23436 int vlen_enc = Assembler::AVX_128bit;
23437 int vlen = Matcher::vector_length(this);
23438 if (vlen <= 4) {
23439 assert(UseSSE >= 3, "required");
23440 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23441 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23442 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23443 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23444 } else {
23445 assert(UseAVX > 0, "required");
23446 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23447 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23448 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23449 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23450 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23451 }
23452 %}
23453 ins_pipe( pipe_slow );
23454 %}
23455
23456 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23457 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23458 match(Set dst (VectorStoreMask src size));
23459 effect(TEMP_DEF dst, TEMP xtmp);
23460 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23461 ins_encode %{
23462 assert(UseSSE >= 3, "required");
23463 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23464 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23465 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23466 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23467 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23468 %}
23469 ins_pipe( pipe_slow );
23470 %}
23471
23472 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23473 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23474 match(Set dst (VectorStoreMask src size));
23475 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23476 effect(TEMP_DEF dst, TEMP vtmp);
23477 ins_encode %{
23478 int vlen_enc = Assembler::AVX_128bit;
23479 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23480 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23481 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23482 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23483 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23484 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23485 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23486 %}
23487 ins_pipe( pipe_slow );
23488 %}
23489
23490 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23491 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23492 match(Set dst (VectorStoreMask src size));
23493 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23494 ins_encode %{
23495 int src_vlen_enc = vector_length_encoding(this, $src);
23496 int dst_vlen_enc = vector_length_encoding(this);
23497 if (!VM_Version::supports_avx512vl()) {
23498 src_vlen_enc = Assembler::AVX_512bit;
23499 }
23500 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23501 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23502 %}
23503 ins_pipe( pipe_slow );
23504 %}
23505
23506 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23507 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23508 match(Set dst (VectorStoreMask src size));
23509 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23510 ins_encode %{
23511 int src_vlen_enc = vector_length_encoding(this, $src);
23512 int dst_vlen_enc = vector_length_encoding(this);
23513 if (!VM_Version::supports_avx512vl()) {
23514 src_vlen_enc = Assembler::AVX_512bit;
23515 }
23516 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23517 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23518 %}
23519 ins_pipe( pipe_slow );
23520 %}
23521
23522 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23523 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23524 match(Set dst (VectorStoreMask mask size));
23525 effect(TEMP_DEF dst);
23526 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23527 ins_encode %{
23528 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23529 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23530 false, Assembler::AVX_512bit, noreg);
23531 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23532 %}
23533 ins_pipe( pipe_slow );
23534 %}
23535
23536 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23537 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23538 match(Set dst (VectorStoreMask mask size));
23539 effect(TEMP_DEF dst);
23540 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23541 ins_encode %{
23542 int dst_vlen_enc = vector_length_encoding(this);
23543 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23544 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23545 %}
23546 ins_pipe( pipe_slow );
23547 %}
23548
23549 instruct vmaskcast_evex(kReg dst) %{
23550 match(Set dst (VectorMaskCast dst));
23551 ins_cost(0);
23552 format %{ "vector_mask_cast $dst" %}
23553 ins_encode %{
23554 // empty
23555 %}
23556 ins_pipe(empty);
23557 %}
23558
23559 instruct vmaskcast(vec dst) %{
23560 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23561 match(Set dst (VectorMaskCast dst));
23562 ins_cost(0);
23563 format %{ "vector_mask_cast $dst" %}
23564 ins_encode %{
23565 // empty
23566 %}
23567 ins_pipe(empty);
23568 %}
23569
23570 instruct vmaskcast_avx(vec dst, vec src) %{
23571 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23572 match(Set dst (VectorMaskCast src));
23573 format %{ "vector_mask_cast $dst, $src" %}
23574 ins_encode %{
23575 int vlen = Matcher::vector_length(this);
23576 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23577 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23578 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23579 %}
23580 ins_pipe(pipe_slow);
23581 %}
23582
23583 //-------------------------------- Load Iota Indices ----------------------------------
23584
23585 instruct loadIotaIndices(vec dst, immI_0 src) %{
23586 match(Set dst (VectorLoadConst src));
23587 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23588 ins_encode %{
23589 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23590 BasicType bt = Matcher::vector_element_basic_type(this);
23591 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23592 %}
23593 ins_pipe( pipe_slow );
23594 %}
23595
23596 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23597 match(Set dst (PopulateIndex src1 src2));
23598 effect(TEMP dst, TEMP vtmp);
23599 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23600 ins_encode %{
23601 assert($src2$$constant == 1, "required");
23602 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23603 int vlen_enc = vector_length_encoding(this);
23604 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23605 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23606 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23607 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23608 %}
23609 ins_pipe( pipe_slow );
23610 %}
23611
23612 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23613 match(Set dst (PopulateIndex src1 src2));
23614 effect(TEMP dst, TEMP vtmp);
23615 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23616 ins_encode %{
23617 assert($src2$$constant == 1, "required");
23618 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23619 int vlen_enc = vector_length_encoding(this);
23620 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23621 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23622 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23623 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23624 %}
23625 ins_pipe( pipe_slow );
23626 %}
23627
23628 //-------------------------------- Rearrange ----------------------------------
23629
23630 // LoadShuffle/Rearrange for Byte
23631 instruct rearrangeB(vec dst, vec shuffle) %{
23632 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23633 Matcher::vector_length(n) < 32);
23634 match(Set dst (VectorRearrange dst shuffle));
23635 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23636 ins_encode %{
23637 assert(UseSSE >= 4, "required");
23638 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23639 %}
23640 ins_pipe( pipe_slow );
23641 %}
23642
23643 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23644 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23645 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23646 match(Set dst (VectorRearrange src shuffle));
23647 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23648 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23649 ins_encode %{
23650 assert(UseAVX >= 2, "required");
23651 // Swap src into vtmp1
23652 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23653 // Shuffle swapped src to get entries from other 128 bit lane
23654 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23655 // Shuffle original src to get entries from self 128 bit lane
23656 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23657 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23658 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23659 // Perform the blend
23660 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23661 %}
23662 ins_pipe( pipe_slow );
23663 %}
23664
23665
23666 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23667 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23668 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23669 match(Set dst (VectorRearrange src shuffle));
23670 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23671 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23672 ins_encode %{
23673 int vlen_enc = vector_length_encoding(this);
23674 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23675 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23676 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23677 %}
23678 ins_pipe( pipe_slow );
23679 %}
23680
23681 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23682 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23683 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23684 match(Set dst (VectorRearrange src shuffle));
23685 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23686 ins_encode %{
23687 int vlen_enc = vector_length_encoding(this);
23688 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23689 %}
23690 ins_pipe( pipe_slow );
23691 %}
23692
23693 // LoadShuffle/Rearrange for Short
23694
23695 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23696 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23697 !VM_Version::supports_avx512bw());
23698 match(Set dst (VectorLoadShuffle src));
23699 effect(TEMP dst, TEMP vtmp);
23700 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23701 ins_encode %{
23702 // Create a byte shuffle mask from short shuffle mask
23703 // only byte shuffle instruction available on these platforms
23704 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23705 if (UseAVX == 0) {
23706 assert(vlen_in_bytes <= 16, "required");
23707 // Multiply each shuffle by two to get byte index
23708 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23709 __ psllw($vtmp$$XMMRegister, 1);
23710
23711 // Duplicate to create 2 copies of byte index
23712 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23713 __ psllw($dst$$XMMRegister, 8);
23714 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23715
23716 // Add one to get alternate byte index
23717 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23718 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23719 } else {
23720 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23721 int vlen_enc = vector_length_encoding(this);
23722 // Multiply each shuffle by two to get byte index
23723 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23724
23725 // Duplicate to create 2 copies of byte index
23726 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23727 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23728
23729 // Add one to get alternate byte index
23730 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23731 }
23732 %}
23733 ins_pipe( pipe_slow );
23734 %}
23735
23736 instruct rearrangeS(vec dst, vec shuffle) %{
23737 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23738 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23739 match(Set dst (VectorRearrange dst shuffle));
23740 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23741 ins_encode %{
23742 assert(UseSSE >= 4, "required");
23743 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23744 %}
23745 ins_pipe( pipe_slow );
23746 %}
23747
23748 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23749 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23750 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23751 match(Set dst (VectorRearrange src shuffle));
23752 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23753 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23754 ins_encode %{
23755 assert(UseAVX >= 2, "required");
23756 // Swap src into vtmp1
23757 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23758 // Shuffle swapped src to get entries from other 128 bit lane
23759 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23760 // Shuffle original src to get entries from self 128 bit lane
23761 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23762 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23763 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23764 // Perform the blend
23765 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23766 %}
23767 ins_pipe( pipe_slow );
23768 %}
23769
23770 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23771 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23772 VM_Version::supports_avx512bw());
23773 match(Set dst (VectorRearrange src shuffle));
23774 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23775 ins_encode %{
23776 int vlen_enc = vector_length_encoding(this);
23777 if (!VM_Version::supports_avx512vl()) {
23778 vlen_enc = Assembler::AVX_512bit;
23779 }
23780 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23781 %}
23782 ins_pipe( pipe_slow );
23783 %}
23784
23785 // LoadShuffle/Rearrange for Integer and Float
23786
23787 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23788 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23789 Matcher::vector_length(n) == 4 && UseAVX == 0);
23790 match(Set dst (VectorLoadShuffle src));
23791 effect(TEMP dst, TEMP vtmp);
23792 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23793 ins_encode %{
23794 assert(UseSSE >= 4, "required");
23795
23796 // Create a byte shuffle mask from int shuffle mask
23797 // only byte shuffle instruction available on these platforms
23798
23799 // Duplicate and multiply each shuffle by 4
23800 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23801 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23802 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23803 __ psllw($vtmp$$XMMRegister, 2);
23804
23805 // Duplicate again to create 4 copies of byte index
23806 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23807 __ psllw($dst$$XMMRegister, 8);
23808 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23809
23810 // Add 3,2,1,0 to get alternate byte index
23811 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23812 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23813 %}
23814 ins_pipe( pipe_slow );
23815 %}
23816
23817 instruct rearrangeI(vec dst, vec shuffle) %{
23818 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23819 UseAVX == 0);
23820 match(Set dst (VectorRearrange dst shuffle));
23821 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23822 ins_encode %{
23823 assert(UseSSE >= 4, "required");
23824 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23825 %}
23826 ins_pipe( pipe_slow );
23827 %}
23828
23829 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23830 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23831 UseAVX > 0);
23832 match(Set dst (VectorRearrange src shuffle));
23833 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23834 ins_encode %{
23835 int vlen_enc = vector_length_encoding(this);
23836 BasicType bt = Matcher::vector_element_basic_type(this);
23837 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23838 %}
23839 ins_pipe( pipe_slow );
23840 %}
23841
23842 // LoadShuffle/Rearrange for Long and Double
23843
23844 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23845 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23846 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23847 match(Set dst (VectorLoadShuffle src));
23848 effect(TEMP dst, TEMP vtmp);
23849 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23850 ins_encode %{
23851 assert(UseAVX >= 2, "required");
23852
23853 int vlen_enc = vector_length_encoding(this);
23854 // Create a double word shuffle mask from long shuffle mask
23855 // only double word shuffle instruction available on these platforms
23856
23857 // Multiply each shuffle by two to get double word index
23858 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23859
23860 // Duplicate each double word shuffle
23861 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23862 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23863
23864 // Add one to get alternate double word index
23865 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23866 %}
23867 ins_pipe( pipe_slow );
23868 %}
23869
23870 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23871 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23872 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23873 match(Set dst (VectorRearrange src shuffle));
23874 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23875 ins_encode %{
23876 assert(UseAVX >= 2, "required");
23877
23878 int vlen_enc = vector_length_encoding(this);
23879 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23880 %}
23881 ins_pipe( pipe_slow );
23882 %}
23883
23884 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23885 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23886 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23887 match(Set dst (VectorRearrange src shuffle));
23888 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23889 ins_encode %{
23890 assert(UseAVX > 2, "required");
23891
23892 int vlen_enc = vector_length_encoding(this);
23893 if (vlen_enc == Assembler::AVX_128bit) {
23894 vlen_enc = Assembler::AVX_256bit;
23895 }
23896 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23897 %}
23898 ins_pipe( pipe_slow );
23899 %}
23900
23901 // --------------------------------- FMA --------------------------------------
23902 // a * b + c
23903
23904 instruct vfmaF_reg(vec a, vec b, vec c) %{
23905 match(Set c (FmaVF c (Binary a b)));
23906 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23907 ins_cost(150);
23908 ins_encode %{
23909 assert(UseFMA, "not enabled");
23910 int vlen_enc = vector_length_encoding(this);
23911 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23912 %}
23913 ins_pipe( pipe_slow );
23914 %}
23915
23916 instruct vfmaF_mem(vec a, memory b, vec c) %{
23917 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23918 match(Set c (FmaVF c (Binary a (LoadVector b))));
23919 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23920 ins_cost(150);
23921 ins_encode %{
23922 assert(UseFMA, "not enabled");
23923 int vlen_enc = vector_length_encoding(this);
23924 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23925 %}
23926 ins_pipe( pipe_slow );
23927 %}
23928
23929 instruct vfmaD_reg(vec a, vec b, vec c) %{
23930 match(Set c (FmaVD c (Binary a b)));
23931 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23932 ins_cost(150);
23933 ins_encode %{
23934 assert(UseFMA, "not enabled");
23935 int vlen_enc = vector_length_encoding(this);
23936 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23937 %}
23938 ins_pipe( pipe_slow );
23939 %}
23940
23941 instruct vfmaD_mem(vec a, memory b, vec c) %{
23942 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23943 match(Set c (FmaVD c (Binary a (LoadVector b))));
23944 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23945 ins_cost(150);
23946 ins_encode %{
23947 assert(UseFMA, "not enabled");
23948 int vlen_enc = vector_length_encoding(this);
23949 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23950 %}
23951 ins_pipe( pipe_slow );
23952 %}
23953
23954 // --------------------------------- Vector Multiply Add --------------------------------------
23955
23956 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23957 predicate(UseAVX == 0);
23958 match(Set dst (MulAddVS2VI dst src1));
23959 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23960 ins_encode %{
23961 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23962 %}
23963 ins_pipe( pipe_slow );
23964 %}
23965
23966 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23967 predicate(UseAVX > 0);
23968 match(Set dst (MulAddVS2VI src1 src2));
23969 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23970 ins_encode %{
23971 int vlen_enc = vector_length_encoding(this);
23972 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23973 %}
23974 ins_pipe( pipe_slow );
23975 %}
23976
23977 // --------------------------------- Vector Multiply Add Add ----------------------------------
23978
23979 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23980 predicate(VM_Version::supports_avx512_vnni());
23981 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23982 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23983 ins_encode %{
23984 assert(UseAVX > 2, "required");
23985 int vlen_enc = vector_length_encoding(this);
23986 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23987 %}
23988 ins_pipe( pipe_slow );
23989 ins_cost(10);
23990 %}
23991
23992 // --------------------------------- PopCount --------------------------------------
23993
23994 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23995 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23996 match(Set dst (PopCountVI src));
23997 match(Set dst (PopCountVL src));
23998 format %{ "vector_popcount_integral $dst, $src" %}
23999 ins_encode %{
24000 int opcode = this->ideal_Opcode();
24001 int vlen_enc = vector_length_encoding(this, $src);
24002 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24003 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24004 %}
24005 ins_pipe( pipe_slow );
24006 %}
24007
24008 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24009 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24010 match(Set dst (PopCountVI src mask));
24011 match(Set dst (PopCountVL src mask));
24012 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24013 ins_encode %{
24014 int vlen_enc = vector_length_encoding(this, $src);
24015 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24016 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24017 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24018 %}
24019 ins_pipe( pipe_slow );
24020 %}
24021
24022 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24023 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24024 match(Set dst (PopCountVI src));
24025 match(Set dst (PopCountVL src));
24026 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24027 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24028 ins_encode %{
24029 int opcode = this->ideal_Opcode();
24030 int vlen_enc = vector_length_encoding(this, $src);
24031 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24032 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24033 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24034 %}
24035 ins_pipe( pipe_slow );
24036 %}
24037
24038 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24039
24040 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24041 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24042 Matcher::vector_length_in_bytes(n->in(1))));
24043 match(Set dst (CountTrailingZerosV src));
24044 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24045 ins_cost(400);
24046 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24047 ins_encode %{
24048 int vlen_enc = vector_length_encoding(this, $src);
24049 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24050 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24051 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24052 %}
24053 ins_pipe( pipe_slow );
24054 %}
24055
24056 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24057 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24058 VM_Version::supports_avx512cd() &&
24059 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24060 match(Set dst (CountTrailingZerosV src));
24061 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24062 ins_cost(400);
24063 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24064 ins_encode %{
24065 int vlen_enc = vector_length_encoding(this, $src);
24066 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24067 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24068 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24069 %}
24070 ins_pipe( pipe_slow );
24071 %}
24072
24073 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24074 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24075 match(Set dst (CountTrailingZerosV src));
24076 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24077 ins_cost(400);
24078 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24079 ins_encode %{
24080 int vlen_enc = vector_length_encoding(this, $src);
24081 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24082 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24083 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24084 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24085 %}
24086 ins_pipe( pipe_slow );
24087 %}
24088
24089 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24090 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24091 match(Set dst (CountTrailingZerosV src));
24092 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24093 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24094 ins_encode %{
24095 int vlen_enc = vector_length_encoding(this, $src);
24096 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24097 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24098 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24099 %}
24100 ins_pipe( pipe_slow );
24101 %}
24102
24103
24104 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24105
24106 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24107 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24108 effect(TEMP dst);
24109 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24110 ins_encode %{
24111 int vector_len = vector_length_encoding(this);
24112 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24113 %}
24114 ins_pipe( pipe_slow );
24115 %}
24116
24117 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24118 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24119 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24120 effect(TEMP dst);
24121 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24122 ins_encode %{
24123 int vector_len = vector_length_encoding(this);
24124 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24125 %}
24126 ins_pipe( pipe_slow );
24127 %}
24128
24129 // --------------------------------- Rotation Operations ----------------------------------
24130 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24131 match(Set dst (RotateLeftV src shift));
24132 match(Set dst (RotateRightV src shift));
24133 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24134 ins_encode %{
24135 int opcode = this->ideal_Opcode();
24136 int vector_len = vector_length_encoding(this);
24137 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24138 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24139 %}
24140 ins_pipe( pipe_slow );
24141 %}
24142
24143 instruct vprorate(vec dst, vec src, vec shift) %{
24144 match(Set dst (RotateLeftV src shift));
24145 match(Set dst (RotateRightV src shift));
24146 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24147 ins_encode %{
24148 int opcode = this->ideal_Opcode();
24149 int vector_len = vector_length_encoding(this);
24150 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24151 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24152 %}
24153 ins_pipe( pipe_slow );
24154 %}
24155
24156 // ---------------------------------- Masked Operations ------------------------------------
24157 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24158 predicate(!n->in(3)->bottom_type()->isa_vectmask());
24159 match(Set dst (LoadVectorMasked mem mask));
24160 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24161 ins_encode %{
24162 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24163 int vlen_enc = vector_length_encoding(this);
24164 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24165 %}
24166 ins_pipe( pipe_slow );
24167 %}
24168
24169
24170 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24171 predicate(n->in(3)->bottom_type()->isa_vectmask());
24172 match(Set dst (LoadVectorMasked mem mask));
24173 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24174 ins_encode %{
24175 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24176 int vector_len = vector_length_encoding(this);
24177 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24178 %}
24179 ins_pipe( pipe_slow );
24180 %}
24181
24182 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24183 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24184 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24185 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24186 ins_encode %{
24187 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24188 int vlen_enc = vector_length_encoding(src_node);
24189 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24190 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24191 %}
24192 ins_pipe( pipe_slow );
24193 %}
24194
24195 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24196 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24197 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24198 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24199 ins_encode %{
24200 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24201 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24202 int vlen_enc = vector_length_encoding(src_node);
24203 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24204 %}
24205 ins_pipe( pipe_slow );
24206 %}
24207
24208 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24209 match(Set addr (VerifyVectorAlignment addr mask));
24210 effect(KILL cr);
24211 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24212 ins_encode %{
24213 Label Lskip;
24214 // check if masked bits of addr are zero
24215 __ testq($addr$$Register, $mask$$constant);
24216 __ jccb(Assembler::equal, Lskip);
24217 __ stop("verify_vector_alignment found a misaligned vector memory access");
24218 __ bind(Lskip);
24219 %}
24220 ins_pipe(pipe_slow);
24221 %}
24222
24223 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24224 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24225 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24226 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24227 ins_encode %{
24228 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24229 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24230
24231 Label DONE;
24232 int vlen_enc = vector_length_encoding(this, $src1);
24233 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24234
24235 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24236 __ mov64($dst$$Register, -1L);
24237 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24238 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24239 __ jccb(Assembler::carrySet, DONE);
24240 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24241 __ notq($dst$$Register);
24242 __ tzcntq($dst$$Register, $dst$$Register);
24243 __ bind(DONE);
24244 %}
24245 ins_pipe( pipe_slow );
24246 %}
24247
24248
24249 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24250 match(Set dst (VectorMaskGen len));
24251 effect(TEMP temp, KILL cr);
24252 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24253 ins_encode %{
24254 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24255 %}
24256 ins_pipe( pipe_slow );
24257 %}
24258
24259 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24260 match(Set dst (VectorMaskGen len));
24261 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24262 effect(TEMP temp);
24263 ins_encode %{
24264 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24265 __ kmovql($dst$$KRegister, $temp$$Register);
24266 %}
24267 ins_pipe( pipe_slow );
24268 %}
24269
24270 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24271 predicate(n->in(1)->bottom_type()->isa_vectmask());
24272 match(Set dst (VectorMaskToLong mask));
24273 effect(TEMP dst, KILL cr);
24274 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24275 ins_encode %{
24276 int opcode = this->ideal_Opcode();
24277 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24278 int mask_len = Matcher::vector_length(this, $mask);
24279 int mask_size = mask_len * type2aelembytes(mbt);
24280 int vlen_enc = vector_length_encoding(this, $mask);
24281 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24282 $dst$$Register, mask_len, mask_size, vlen_enc);
24283 %}
24284 ins_pipe( pipe_slow );
24285 %}
24286
24287 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24288 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24289 match(Set dst (VectorMaskToLong mask));
24290 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24291 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24292 ins_encode %{
24293 int opcode = this->ideal_Opcode();
24294 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24295 int mask_len = Matcher::vector_length(this, $mask);
24296 int vlen_enc = vector_length_encoding(this, $mask);
24297 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24298 $dst$$Register, mask_len, mbt, vlen_enc);
24299 %}
24300 ins_pipe( pipe_slow );
24301 %}
24302
24303 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24304 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24305 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24306 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24307 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24308 ins_encode %{
24309 int opcode = this->ideal_Opcode();
24310 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24311 int mask_len = Matcher::vector_length(this, $mask);
24312 int vlen_enc = vector_length_encoding(this, $mask);
24313 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24314 $dst$$Register, mask_len, mbt, vlen_enc);
24315 %}
24316 ins_pipe( pipe_slow );
24317 %}
24318
24319 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24320 predicate(n->in(1)->bottom_type()->isa_vectmask());
24321 match(Set dst (VectorMaskTrueCount mask));
24322 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24323 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24324 ins_encode %{
24325 int opcode = this->ideal_Opcode();
24326 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24327 int mask_len = Matcher::vector_length(this, $mask);
24328 int mask_size = mask_len * type2aelembytes(mbt);
24329 int vlen_enc = vector_length_encoding(this, $mask);
24330 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24331 $tmp$$Register, mask_len, mask_size, vlen_enc);
24332 %}
24333 ins_pipe( pipe_slow );
24334 %}
24335
24336 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24337 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24338 match(Set dst (VectorMaskTrueCount mask));
24339 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24340 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24341 ins_encode %{
24342 int opcode = this->ideal_Opcode();
24343 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24344 int mask_len = Matcher::vector_length(this, $mask);
24345 int vlen_enc = vector_length_encoding(this, $mask);
24346 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24347 $tmp$$Register, mask_len, mbt, vlen_enc);
24348 %}
24349 ins_pipe( pipe_slow );
24350 %}
24351
24352 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24353 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24354 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24355 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24356 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24357 ins_encode %{
24358 int opcode = this->ideal_Opcode();
24359 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24360 int mask_len = Matcher::vector_length(this, $mask);
24361 int vlen_enc = vector_length_encoding(this, $mask);
24362 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24363 $tmp$$Register, mask_len, mbt, vlen_enc);
24364 %}
24365 ins_pipe( pipe_slow );
24366 %}
24367
24368 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24369 predicate(n->in(1)->bottom_type()->isa_vectmask());
24370 match(Set dst (VectorMaskFirstTrue mask));
24371 match(Set dst (VectorMaskLastTrue mask));
24372 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24373 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24374 ins_encode %{
24375 int opcode = this->ideal_Opcode();
24376 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24377 int mask_len = Matcher::vector_length(this, $mask);
24378 int mask_size = mask_len * type2aelembytes(mbt);
24379 int vlen_enc = vector_length_encoding(this, $mask);
24380 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24381 $tmp$$Register, mask_len, mask_size, vlen_enc);
24382 %}
24383 ins_pipe( pipe_slow );
24384 %}
24385
24386 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24387 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24388 match(Set dst (VectorMaskFirstTrue mask));
24389 match(Set dst (VectorMaskLastTrue mask));
24390 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24391 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24392 ins_encode %{
24393 int opcode = this->ideal_Opcode();
24394 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24395 int mask_len = Matcher::vector_length(this, $mask);
24396 int vlen_enc = vector_length_encoding(this, $mask);
24397 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24398 $tmp$$Register, mask_len, mbt, vlen_enc);
24399 %}
24400 ins_pipe( pipe_slow );
24401 %}
24402
24403 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24404 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24405 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24406 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24407 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24408 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24409 ins_encode %{
24410 int opcode = this->ideal_Opcode();
24411 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24412 int mask_len = Matcher::vector_length(this, $mask);
24413 int vlen_enc = vector_length_encoding(this, $mask);
24414 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24415 $tmp$$Register, mask_len, mbt, vlen_enc);
24416 %}
24417 ins_pipe( pipe_slow );
24418 %}
24419
24420 // --------------------------------- Compress/Expand Operations ---------------------------
24421 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24422 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24423 match(Set dst (CompressV src mask));
24424 match(Set dst (ExpandV src mask));
24425 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24426 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24427 ins_encode %{
24428 int opcode = this->ideal_Opcode();
24429 int vlen_enc = vector_length_encoding(this);
24430 BasicType bt = Matcher::vector_element_basic_type(this);
24431 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24432 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24433 %}
24434 ins_pipe( pipe_slow );
24435 %}
24436
24437 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24438 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24439 match(Set dst (CompressV src mask));
24440 match(Set dst (ExpandV src mask));
24441 format %{ "vector_compress_expand $dst, $src, $mask" %}
24442 ins_encode %{
24443 int opcode = this->ideal_Opcode();
24444 int vector_len = vector_length_encoding(this);
24445 BasicType bt = Matcher::vector_element_basic_type(this);
24446 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24447 %}
24448 ins_pipe( pipe_slow );
24449 %}
24450
24451 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24452 match(Set dst (CompressM mask));
24453 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24454 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24455 ins_encode %{
24456 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24457 int mask_len = Matcher::vector_length(this);
24458 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24459 %}
24460 ins_pipe( pipe_slow );
24461 %}
24462
24463 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24464
24465 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24466 predicate(!VM_Version::supports_gfni());
24467 match(Set dst (ReverseV src));
24468 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24469 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24470 ins_encode %{
24471 int vec_enc = vector_length_encoding(this);
24472 BasicType bt = Matcher::vector_element_basic_type(this);
24473 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24474 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24475 %}
24476 ins_pipe( pipe_slow );
24477 %}
24478
24479 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24480 predicate(VM_Version::supports_gfni());
24481 match(Set dst (ReverseV src));
24482 effect(TEMP dst, TEMP xtmp);
24483 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24484 ins_encode %{
24485 int vec_enc = vector_length_encoding(this);
24486 BasicType bt = Matcher::vector_element_basic_type(this);
24487 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24488 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24489 $xtmp$$XMMRegister);
24490 %}
24491 ins_pipe( pipe_slow );
24492 %}
24493
24494 instruct vreverse_byte_reg(vec dst, vec src) %{
24495 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24496 match(Set dst (ReverseBytesV src));
24497 effect(TEMP dst);
24498 format %{ "vector_reverse_byte $dst, $src" %}
24499 ins_encode %{
24500 int vec_enc = vector_length_encoding(this);
24501 BasicType bt = Matcher::vector_element_basic_type(this);
24502 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24503 %}
24504 ins_pipe( pipe_slow );
24505 %}
24506
24507 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24508 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24509 match(Set dst (ReverseBytesV src));
24510 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24511 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24512 ins_encode %{
24513 int vec_enc = vector_length_encoding(this);
24514 BasicType bt = Matcher::vector_element_basic_type(this);
24515 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24516 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24517 %}
24518 ins_pipe( pipe_slow );
24519 %}
24520
24521 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24522
24523 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24524 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24525 Matcher::vector_length_in_bytes(n->in(1))));
24526 match(Set dst (CountLeadingZerosV src));
24527 format %{ "vector_count_leading_zeros $dst, $src" %}
24528 ins_encode %{
24529 int vlen_enc = vector_length_encoding(this, $src);
24530 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24531 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24532 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24533 %}
24534 ins_pipe( pipe_slow );
24535 %}
24536
24537 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24538 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24539 Matcher::vector_length_in_bytes(n->in(1))));
24540 match(Set dst (CountLeadingZerosV src mask));
24541 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24542 ins_encode %{
24543 int vlen_enc = vector_length_encoding(this, $src);
24544 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24545 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24546 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24547 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24548 %}
24549 ins_pipe( pipe_slow );
24550 %}
24551
24552 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24553 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24554 VM_Version::supports_avx512cd() &&
24555 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24556 match(Set dst (CountLeadingZerosV src));
24557 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24558 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24559 ins_encode %{
24560 int vlen_enc = vector_length_encoding(this, $src);
24561 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24562 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24563 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24564 %}
24565 ins_pipe( pipe_slow );
24566 %}
24567
24568 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24569 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24570 match(Set dst (CountLeadingZerosV src));
24571 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24572 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24573 ins_encode %{
24574 int vlen_enc = vector_length_encoding(this, $src);
24575 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24576 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24577 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24578 $rtmp$$Register, true, vlen_enc);
24579 %}
24580 ins_pipe( pipe_slow );
24581 %}
24582
24583 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24584 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24585 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24586 match(Set dst (CountLeadingZerosV src));
24587 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24588 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24589 ins_encode %{
24590 int vlen_enc = vector_length_encoding(this, $src);
24591 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24592 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24593 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24594 %}
24595 ins_pipe( pipe_slow );
24596 %}
24597
24598 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24599 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24600 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24601 match(Set dst (CountLeadingZerosV src));
24602 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24603 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24604 ins_encode %{
24605 int vlen_enc = vector_length_encoding(this, $src);
24606 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24607 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24608 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24609 %}
24610 ins_pipe( pipe_slow );
24611 %}
24612
24613 // ---------------------------------- Vector Masked Operations ------------------------------------
24614
24615 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24616 match(Set dst (AddVB (Binary dst src2) mask));
24617 match(Set dst (AddVS (Binary dst src2) mask));
24618 match(Set dst (AddVI (Binary dst src2) mask));
24619 match(Set dst (AddVL (Binary dst src2) mask));
24620 match(Set dst (AddVF (Binary dst src2) mask));
24621 match(Set dst (AddVD (Binary dst src2) mask));
24622 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24623 ins_encode %{
24624 int vlen_enc = vector_length_encoding(this);
24625 BasicType bt = Matcher::vector_element_basic_type(this);
24626 int opc = this->ideal_Opcode();
24627 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24628 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24629 %}
24630 ins_pipe( pipe_slow );
24631 %}
24632
24633 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24634 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24635 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24636 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24637 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24638 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24639 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24640 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24641 ins_encode %{
24642 int vlen_enc = vector_length_encoding(this);
24643 BasicType bt = Matcher::vector_element_basic_type(this);
24644 int opc = this->ideal_Opcode();
24645 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24646 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24647 %}
24648 ins_pipe( pipe_slow );
24649 %}
24650
24651 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24652 match(Set dst (XorV (Binary dst src2) mask));
24653 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24654 ins_encode %{
24655 int vlen_enc = vector_length_encoding(this);
24656 BasicType bt = Matcher::vector_element_basic_type(this);
24657 int opc = this->ideal_Opcode();
24658 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24659 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24660 %}
24661 ins_pipe( pipe_slow );
24662 %}
24663
24664 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24665 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24666 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24667 ins_encode %{
24668 int vlen_enc = vector_length_encoding(this);
24669 BasicType bt = Matcher::vector_element_basic_type(this);
24670 int opc = this->ideal_Opcode();
24671 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24672 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24673 %}
24674 ins_pipe( pipe_slow );
24675 %}
24676
24677 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24678 match(Set dst (OrV (Binary dst src2) mask));
24679 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24680 ins_encode %{
24681 int vlen_enc = vector_length_encoding(this);
24682 BasicType bt = Matcher::vector_element_basic_type(this);
24683 int opc = this->ideal_Opcode();
24684 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24685 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24686 %}
24687 ins_pipe( pipe_slow );
24688 %}
24689
24690 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24691 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24692 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24693 ins_encode %{
24694 int vlen_enc = vector_length_encoding(this);
24695 BasicType bt = Matcher::vector_element_basic_type(this);
24696 int opc = this->ideal_Opcode();
24697 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24698 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24699 %}
24700 ins_pipe( pipe_slow );
24701 %}
24702
24703 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24704 match(Set dst (AndV (Binary dst src2) mask));
24705 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24706 ins_encode %{
24707 int vlen_enc = vector_length_encoding(this);
24708 BasicType bt = Matcher::vector_element_basic_type(this);
24709 int opc = this->ideal_Opcode();
24710 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24711 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24712 %}
24713 ins_pipe( pipe_slow );
24714 %}
24715
24716 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24717 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24718 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24719 ins_encode %{
24720 int vlen_enc = vector_length_encoding(this);
24721 BasicType bt = Matcher::vector_element_basic_type(this);
24722 int opc = this->ideal_Opcode();
24723 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24724 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24725 %}
24726 ins_pipe( pipe_slow );
24727 %}
24728
24729 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24730 match(Set dst (SubVB (Binary dst src2) mask));
24731 match(Set dst (SubVS (Binary dst src2) mask));
24732 match(Set dst (SubVI (Binary dst src2) mask));
24733 match(Set dst (SubVL (Binary dst src2) mask));
24734 match(Set dst (SubVF (Binary dst src2) mask));
24735 match(Set dst (SubVD (Binary dst src2) mask));
24736 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24737 ins_encode %{
24738 int vlen_enc = vector_length_encoding(this);
24739 BasicType bt = Matcher::vector_element_basic_type(this);
24740 int opc = this->ideal_Opcode();
24741 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24742 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24743 %}
24744 ins_pipe( pipe_slow );
24745 %}
24746
24747 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24748 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24749 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24750 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24751 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24752 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24753 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24754 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24755 ins_encode %{
24756 int vlen_enc = vector_length_encoding(this);
24757 BasicType bt = Matcher::vector_element_basic_type(this);
24758 int opc = this->ideal_Opcode();
24759 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24761 %}
24762 ins_pipe( pipe_slow );
24763 %}
24764
24765 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24766 match(Set dst (MulVS (Binary dst src2) mask));
24767 match(Set dst (MulVI (Binary dst src2) mask));
24768 match(Set dst (MulVL (Binary dst src2) mask));
24769 match(Set dst (MulVF (Binary dst src2) mask));
24770 match(Set dst (MulVD (Binary dst src2) mask));
24771 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24772 ins_encode %{
24773 int vlen_enc = vector_length_encoding(this);
24774 BasicType bt = Matcher::vector_element_basic_type(this);
24775 int opc = this->ideal_Opcode();
24776 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24777 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24778 %}
24779 ins_pipe( pipe_slow );
24780 %}
24781
24782 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24783 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24784 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24785 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24786 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24787 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24788 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24789 ins_encode %{
24790 int vlen_enc = vector_length_encoding(this);
24791 BasicType bt = Matcher::vector_element_basic_type(this);
24792 int opc = this->ideal_Opcode();
24793 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24794 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24795 %}
24796 ins_pipe( pipe_slow );
24797 %}
24798
24799 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24800 match(Set dst (SqrtVF dst mask));
24801 match(Set dst (SqrtVD dst mask));
24802 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24803 ins_encode %{
24804 int vlen_enc = vector_length_encoding(this);
24805 BasicType bt = Matcher::vector_element_basic_type(this);
24806 int opc = this->ideal_Opcode();
24807 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24808 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24809 %}
24810 ins_pipe( pipe_slow );
24811 %}
24812
24813 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24814 match(Set dst (DivVF (Binary dst src2) mask));
24815 match(Set dst (DivVD (Binary dst src2) mask));
24816 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24817 ins_encode %{
24818 int vlen_enc = vector_length_encoding(this);
24819 BasicType bt = Matcher::vector_element_basic_type(this);
24820 int opc = this->ideal_Opcode();
24821 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24822 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24823 %}
24824 ins_pipe( pipe_slow );
24825 %}
24826
24827 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24828 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24829 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24830 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24831 ins_encode %{
24832 int vlen_enc = vector_length_encoding(this);
24833 BasicType bt = Matcher::vector_element_basic_type(this);
24834 int opc = this->ideal_Opcode();
24835 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24836 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24837 %}
24838 ins_pipe( pipe_slow );
24839 %}
24840
24841
24842 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24843 match(Set dst (RotateLeftV (Binary dst shift) mask));
24844 match(Set dst (RotateRightV (Binary dst shift) mask));
24845 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24846 ins_encode %{
24847 int vlen_enc = vector_length_encoding(this);
24848 BasicType bt = Matcher::vector_element_basic_type(this);
24849 int opc = this->ideal_Opcode();
24850 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24851 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24852 %}
24853 ins_pipe( pipe_slow );
24854 %}
24855
24856 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24857 match(Set dst (RotateLeftV (Binary dst src2) mask));
24858 match(Set dst (RotateRightV (Binary dst src2) mask));
24859 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24860 ins_encode %{
24861 int vlen_enc = vector_length_encoding(this);
24862 BasicType bt = Matcher::vector_element_basic_type(this);
24863 int opc = this->ideal_Opcode();
24864 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24865 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24866 %}
24867 ins_pipe( pipe_slow );
24868 %}
24869
24870 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24871 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24872 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24873 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24874 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24875 ins_encode %{
24876 int vlen_enc = vector_length_encoding(this);
24877 BasicType bt = Matcher::vector_element_basic_type(this);
24878 int opc = this->ideal_Opcode();
24879 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24880 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24881 %}
24882 ins_pipe( pipe_slow );
24883 %}
24884
24885 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24886 predicate(!n->as_ShiftV()->is_var_shift());
24887 match(Set dst (LShiftVS (Binary dst src2) mask));
24888 match(Set dst (LShiftVI (Binary dst src2) mask));
24889 match(Set dst (LShiftVL (Binary dst src2) mask));
24890 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24891 ins_encode %{
24892 int vlen_enc = vector_length_encoding(this);
24893 BasicType bt = Matcher::vector_element_basic_type(this);
24894 int opc = this->ideal_Opcode();
24895 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24896 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24897 %}
24898 ins_pipe( pipe_slow );
24899 %}
24900
24901 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24902 predicate(n->as_ShiftV()->is_var_shift());
24903 match(Set dst (LShiftVS (Binary dst src2) mask));
24904 match(Set dst (LShiftVI (Binary dst src2) mask));
24905 match(Set dst (LShiftVL (Binary dst src2) mask));
24906 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24907 ins_encode %{
24908 int vlen_enc = vector_length_encoding(this);
24909 BasicType bt = Matcher::vector_element_basic_type(this);
24910 int opc = this->ideal_Opcode();
24911 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24912 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24913 %}
24914 ins_pipe( pipe_slow );
24915 %}
24916
24917 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24918 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24919 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24920 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24921 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24922 ins_encode %{
24923 int vlen_enc = vector_length_encoding(this);
24924 BasicType bt = Matcher::vector_element_basic_type(this);
24925 int opc = this->ideal_Opcode();
24926 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24927 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24928 %}
24929 ins_pipe( pipe_slow );
24930 %}
24931
24932 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24933 predicate(!n->as_ShiftV()->is_var_shift());
24934 match(Set dst (RShiftVS (Binary dst src2) mask));
24935 match(Set dst (RShiftVI (Binary dst src2) mask));
24936 match(Set dst (RShiftVL (Binary dst src2) mask));
24937 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24938 ins_encode %{
24939 int vlen_enc = vector_length_encoding(this);
24940 BasicType bt = Matcher::vector_element_basic_type(this);
24941 int opc = this->ideal_Opcode();
24942 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24943 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24944 %}
24945 ins_pipe( pipe_slow );
24946 %}
24947
24948 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24949 predicate(n->as_ShiftV()->is_var_shift());
24950 match(Set dst (RShiftVS (Binary dst src2) mask));
24951 match(Set dst (RShiftVI (Binary dst src2) mask));
24952 match(Set dst (RShiftVL (Binary dst src2) mask));
24953 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24954 ins_encode %{
24955 int vlen_enc = vector_length_encoding(this);
24956 BasicType bt = Matcher::vector_element_basic_type(this);
24957 int opc = this->ideal_Opcode();
24958 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24959 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24960 %}
24961 ins_pipe( pipe_slow );
24962 %}
24963
24964 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24965 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24966 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24967 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24968 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24969 ins_encode %{
24970 int vlen_enc = vector_length_encoding(this);
24971 BasicType bt = Matcher::vector_element_basic_type(this);
24972 int opc = this->ideal_Opcode();
24973 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24974 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24975 %}
24976 ins_pipe( pipe_slow );
24977 %}
24978
24979 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24980 predicate(!n->as_ShiftV()->is_var_shift());
24981 match(Set dst (URShiftVS (Binary dst src2) mask));
24982 match(Set dst (URShiftVI (Binary dst src2) mask));
24983 match(Set dst (URShiftVL (Binary dst src2) mask));
24984 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24985 ins_encode %{
24986 int vlen_enc = vector_length_encoding(this);
24987 BasicType bt = Matcher::vector_element_basic_type(this);
24988 int opc = this->ideal_Opcode();
24989 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24990 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24991 %}
24992 ins_pipe( pipe_slow );
24993 %}
24994
24995 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24996 predicate(n->as_ShiftV()->is_var_shift());
24997 match(Set dst (URShiftVS (Binary dst src2) mask));
24998 match(Set dst (URShiftVI (Binary dst src2) mask));
24999 match(Set dst (URShiftVL (Binary dst src2) mask));
25000 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25001 ins_encode %{
25002 int vlen_enc = vector_length_encoding(this);
25003 BasicType bt = Matcher::vector_element_basic_type(this);
25004 int opc = this->ideal_Opcode();
25005 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25006 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25007 %}
25008 ins_pipe( pipe_slow );
25009 %}
25010
25011 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25012 match(Set dst (MaxV (Binary dst src2) mask));
25013 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25014 ins_encode %{
25015 int vlen_enc = vector_length_encoding(this);
25016 BasicType bt = Matcher::vector_element_basic_type(this);
25017 int opc = this->ideal_Opcode();
25018 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25019 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25020 %}
25021 ins_pipe( pipe_slow );
25022 %}
25023
25024 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25025 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25026 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25027 ins_encode %{
25028 int vlen_enc = vector_length_encoding(this);
25029 BasicType bt = Matcher::vector_element_basic_type(this);
25030 int opc = this->ideal_Opcode();
25031 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25032 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25033 %}
25034 ins_pipe( pipe_slow );
25035 %}
25036
25037 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25038 match(Set dst (MinV (Binary dst src2) mask));
25039 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25040 ins_encode %{
25041 int vlen_enc = vector_length_encoding(this);
25042 BasicType bt = Matcher::vector_element_basic_type(this);
25043 int opc = this->ideal_Opcode();
25044 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25045 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25046 %}
25047 ins_pipe( pipe_slow );
25048 %}
25049
25050 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25051 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25052 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25053 ins_encode %{
25054 int vlen_enc = vector_length_encoding(this);
25055 BasicType bt = Matcher::vector_element_basic_type(this);
25056 int opc = this->ideal_Opcode();
25057 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25058 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25059 %}
25060 ins_pipe( pipe_slow );
25061 %}
25062
25063 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25064 match(Set dst (VectorRearrange (Binary dst src2) mask));
25065 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25066 ins_encode %{
25067 int vlen_enc = vector_length_encoding(this);
25068 BasicType bt = Matcher::vector_element_basic_type(this);
25069 int opc = this->ideal_Opcode();
25070 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25071 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25072 %}
25073 ins_pipe( pipe_slow );
25074 %}
25075
25076 instruct vabs_masked(vec dst, kReg mask) %{
25077 match(Set dst (AbsVB dst mask));
25078 match(Set dst (AbsVS dst mask));
25079 match(Set dst (AbsVI dst mask));
25080 match(Set dst (AbsVL dst mask));
25081 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25082 ins_encode %{
25083 int vlen_enc = vector_length_encoding(this);
25084 BasicType bt = Matcher::vector_element_basic_type(this);
25085 int opc = this->ideal_Opcode();
25086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25087 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25088 %}
25089 ins_pipe( pipe_slow );
25090 %}
25091
25092 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25093 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25094 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25095 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25096 ins_encode %{
25097 assert(UseFMA, "Needs FMA instructions support.");
25098 int vlen_enc = vector_length_encoding(this);
25099 BasicType bt = Matcher::vector_element_basic_type(this);
25100 int opc = this->ideal_Opcode();
25101 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25102 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25103 %}
25104 ins_pipe( pipe_slow );
25105 %}
25106
25107 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25108 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25109 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25110 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25111 ins_encode %{
25112 assert(UseFMA, "Needs FMA instructions support.");
25113 int vlen_enc = vector_length_encoding(this);
25114 BasicType bt = Matcher::vector_element_basic_type(this);
25115 int opc = this->ideal_Opcode();
25116 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25117 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25118 %}
25119 ins_pipe( pipe_slow );
25120 %}
25121
25122 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25123 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25124 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25125 ins_encode %{
25126 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25127 int vlen_enc = vector_length_encoding(this, $src1);
25128 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25129
25130 // Comparison i
25131 switch (src1_elem_bt) {
25132 case T_BYTE: {
25133 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25134 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25135 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25136 break;
25137 }
25138 case T_SHORT: {
25139 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25140 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25141 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25142 break;
25143 }
25144 case T_INT: {
25145 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25146 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25147 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25148 break;
25149 }
25150 case T_LONG: {
25151 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25152 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25153 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25154 break;
25155 }
25156 case T_FLOAT: {
25157 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25158 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25159 break;
25160 }
25161 case T_DOUBLE: {
25162 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25163 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25164 break;
25165 }
25166 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25167 }
25168 %}
25169 ins_pipe( pipe_slow );
25170 %}
25171
25172 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25173 predicate(Matcher::vector_length(n) <= 32);
25174 match(Set dst (MaskAll src));
25175 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25176 ins_encode %{
25177 int mask_len = Matcher::vector_length(this);
25178 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25179 %}
25180 ins_pipe( pipe_slow );
25181 %}
25182
25183 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25184 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25185 match(Set dst (XorVMask src (MaskAll cnt)));
25186 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25187 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25188 ins_encode %{
25189 uint masklen = Matcher::vector_length(this);
25190 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25191 %}
25192 ins_pipe( pipe_slow );
25193 %}
25194
25195 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25196 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25197 (Matcher::vector_length(n) == 16) ||
25198 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25199 match(Set dst (XorVMask src (MaskAll cnt)));
25200 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25201 ins_encode %{
25202 uint masklen = Matcher::vector_length(this);
25203 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25204 %}
25205 ins_pipe( pipe_slow );
25206 %}
25207
25208 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25209 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25210 match(Set dst (VectorLongToMask src));
25211 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25212 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25213 ins_encode %{
25214 int mask_len = Matcher::vector_length(this);
25215 int vec_enc = vector_length_encoding(mask_len);
25216 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25217 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25218 %}
25219 ins_pipe( pipe_slow );
25220 %}
25221
25222
25223 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25224 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25225 match(Set dst (VectorLongToMask src));
25226 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25227 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25228 ins_encode %{
25229 int mask_len = Matcher::vector_length(this);
25230 assert(mask_len <= 32, "invalid mask length");
25231 int vec_enc = vector_length_encoding(mask_len);
25232 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25233 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25234 %}
25235 ins_pipe( pipe_slow );
25236 %}
25237
25238 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25239 predicate(n->bottom_type()->isa_vectmask());
25240 match(Set dst (VectorLongToMask src));
25241 format %{ "long_to_mask_evex $dst, $src\t!" %}
25242 ins_encode %{
25243 __ kmov($dst$$KRegister, $src$$Register);
25244 %}
25245 ins_pipe( pipe_slow );
25246 %}
25247
25248 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25249 match(Set dst (AndVMask src1 src2));
25250 match(Set dst (OrVMask src1 src2));
25251 match(Set dst (XorVMask src1 src2));
25252 effect(TEMP kscratch);
25253 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25254 ins_encode %{
25255 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25256 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25257 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25258 uint masklen = Matcher::vector_length(this);
25259 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25260 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25261 %}
25262 ins_pipe( pipe_slow );
25263 %}
25264
25265 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25266 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25267 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25268 ins_encode %{
25269 int vlen_enc = vector_length_encoding(this);
25270 BasicType bt = Matcher::vector_element_basic_type(this);
25271 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25272 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25273 %}
25274 ins_pipe( pipe_slow );
25275 %}
25276
25277 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25278 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25279 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25280 ins_encode %{
25281 int vlen_enc = vector_length_encoding(this);
25282 BasicType bt = Matcher::vector_element_basic_type(this);
25283 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25284 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25285 %}
25286 ins_pipe( pipe_slow );
25287 %}
25288
25289 instruct castMM(kReg dst)
25290 %{
25291 match(Set dst (CastVV dst));
25292
25293 size(0);
25294 format %{ "# castVV of $dst" %}
25295 ins_encode(/* empty encoding */);
25296 ins_cost(0);
25297 ins_pipe(empty);
25298 %}
25299
25300 instruct castVV(vec dst)
25301 %{
25302 match(Set dst (CastVV dst));
25303
25304 size(0);
25305 format %{ "# castVV of $dst" %}
25306 ins_encode(/* empty encoding */);
25307 ins_cost(0);
25308 ins_pipe(empty);
25309 %}
25310
25311 instruct castVVLeg(legVec dst)
25312 %{
25313 match(Set dst (CastVV dst));
25314
25315 size(0);
25316 format %{ "# castVV of $dst" %}
25317 ins_encode(/* empty encoding */);
25318 ins_cost(0);
25319 ins_pipe(empty);
25320 %}
25321
25322 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25323 %{
25324 match(Set dst (IsInfiniteF src));
25325 effect(TEMP ktmp, KILL cr);
25326 format %{ "float_class_check $dst, $src" %}
25327 ins_encode %{
25328 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25329 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25330 %}
25331 ins_pipe(pipe_slow);
25332 %}
25333
25334 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25335 %{
25336 match(Set dst (IsInfiniteD src));
25337 effect(TEMP ktmp, KILL cr);
25338 format %{ "double_class_check $dst, $src" %}
25339 ins_encode %{
25340 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25341 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25342 %}
25343 ins_pipe(pipe_slow);
25344 %}
25345
25346 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25347 %{
25348 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25349 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25350 match(Set dst (SaturatingAddV src1 src2));
25351 match(Set dst (SaturatingSubV src1 src2));
25352 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25353 ins_encode %{
25354 int vlen_enc = vector_length_encoding(this);
25355 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25356 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25357 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25358 %}
25359 ins_pipe(pipe_slow);
25360 %}
25361
25362 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25363 %{
25364 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25365 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25366 match(Set dst (SaturatingAddV src1 src2));
25367 match(Set dst (SaturatingSubV src1 src2));
25368 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25369 ins_encode %{
25370 int vlen_enc = vector_length_encoding(this);
25371 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25372 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25373 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25374 %}
25375 ins_pipe(pipe_slow);
25376 %}
25377
25378 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25379 %{
25380 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25381 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25382 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25383 match(Set dst (SaturatingAddV src1 src2));
25384 match(Set dst (SaturatingSubV src1 src2));
25385 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25386 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25387 ins_encode %{
25388 int vlen_enc = vector_length_encoding(this);
25389 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25390 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25391 $src1$$XMMRegister, $src2$$XMMRegister,
25392 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25393 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25394 %}
25395 ins_pipe(pipe_slow);
25396 %}
25397
25398 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25399 %{
25400 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25401 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25402 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25403 match(Set dst (SaturatingAddV src1 src2));
25404 match(Set dst (SaturatingSubV src1 src2));
25405 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25406 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25407 ins_encode %{
25408 int vlen_enc = vector_length_encoding(this);
25409 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25410 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25411 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25412 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25413 %}
25414 ins_pipe(pipe_slow);
25415 %}
25416
25417 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25418 %{
25419 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25420 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25421 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25422 match(Set dst (SaturatingAddV src1 src2));
25423 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25424 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25425 ins_encode %{
25426 int vlen_enc = vector_length_encoding(this);
25427 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25428 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25429 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25430 %}
25431 ins_pipe(pipe_slow);
25432 %}
25433
25434 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25435 %{
25436 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25437 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25438 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25439 match(Set dst (SaturatingAddV src1 src2));
25440 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25441 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25442 ins_encode %{
25443 int vlen_enc = vector_length_encoding(this);
25444 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25445 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25446 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25447 %}
25448 ins_pipe(pipe_slow);
25449 %}
25450
25451 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25452 %{
25453 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25454 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25455 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25456 match(Set dst (SaturatingSubV src1 src2));
25457 effect(TEMP ktmp);
25458 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25459 ins_encode %{
25460 int vlen_enc = vector_length_encoding(this);
25461 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25462 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25463 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25464 %}
25465 ins_pipe(pipe_slow);
25466 %}
25467
25468 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25469 %{
25470 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25471 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25472 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25473 match(Set dst (SaturatingSubV src1 src2));
25474 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25475 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25476 ins_encode %{
25477 int vlen_enc = vector_length_encoding(this);
25478 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25479 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25480 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25481 %}
25482 ins_pipe(pipe_slow);
25483 %}
25484
25485 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25486 %{
25487 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25488 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25489 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25490 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25491 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25492 ins_encode %{
25493 int vlen_enc = vector_length_encoding(this);
25494 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25495 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25496 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25497 %}
25498 ins_pipe(pipe_slow);
25499 %}
25500
25501 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25502 %{
25503 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25504 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25505 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25506 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25507 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25508 ins_encode %{
25509 int vlen_enc = vector_length_encoding(this);
25510 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25511 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25512 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25513 %}
25514 ins_pipe(pipe_slow);
25515 %}
25516
25517 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25518 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25519 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25520 match(Set dst (SaturatingAddV (Binary dst src) mask));
25521 match(Set dst (SaturatingSubV (Binary dst src) mask));
25522 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25523 ins_encode %{
25524 int vlen_enc = vector_length_encoding(this);
25525 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25526 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25527 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25528 %}
25529 ins_pipe( pipe_slow );
25530 %}
25531
25532 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25533 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25534 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25535 match(Set dst (SaturatingAddV (Binary dst src) mask));
25536 match(Set dst (SaturatingSubV (Binary dst src) mask));
25537 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25538 ins_encode %{
25539 int vlen_enc = vector_length_encoding(this);
25540 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25541 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25542 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25543 %}
25544 ins_pipe( pipe_slow );
25545 %}
25546
25547 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25548 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25549 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25550 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25551 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25552 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25553 ins_encode %{
25554 int vlen_enc = vector_length_encoding(this);
25555 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25556 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25557 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25558 %}
25559 ins_pipe( pipe_slow );
25560 %}
25561
25562 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25563 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25564 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25565 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25566 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25567 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25568 ins_encode %{
25569 int vlen_enc = vector_length_encoding(this);
25570 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25571 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25572 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25573 %}
25574 ins_pipe( pipe_slow );
25575 %}
25576
25577 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25578 %{
25579 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25580 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25581 ins_encode %{
25582 int vlen_enc = vector_length_encoding(this);
25583 BasicType bt = Matcher::vector_element_basic_type(this);
25584 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25585 %}
25586 ins_pipe(pipe_slow);
25587 %}
25588
25589 instruct reinterpretS2HF(regF dst, rRegI src)
25590 %{
25591 match(Set dst (ReinterpretS2HF src));
25592 format %{ "vmovw $dst, $src" %}
25593 ins_encode %{
25594 __ vmovw($dst$$XMMRegister, $src$$Register);
25595 %}
25596 ins_pipe(pipe_slow);
25597 %}
25598
25599 instruct reinterpretHF2S(rRegI dst, regF src)
25600 %{
25601 match(Set dst (ReinterpretHF2S src));
25602 format %{ "vmovw $dst, $src" %}
25603 ins_encode %{
25604 __ vmovw($dst$$Register, $src$$XMMRegister);
25605 %}
25606 ins_pipe(pipe_slow);
25607 %}
25608
25609 instruct convF2HFAndS2HF(regF dst, regF src)
25610 %{
25611 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25612 format %{ "convF2HFAndS2HF $dst, $src" %}
25613 ins_encode %{
25614 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25615 %}
25616 ins_pipe(pipe_slow);
25617 %}
25618
25619 instruct convHF2SAndHF2F(regF dst, regF src)
25620 %{
25621 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25622 format %{ "convHF2SAndHF2F $dst, $src" %}
25623 ins_encode %{
25624 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25625 %}
25626 ins_pipe(pipe_slow);
25627 %}
25628
25629 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25630 %{
25631 match(Set dst (SqrtHF src));
25632 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25633 ins_encode %{
25634 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25635 %}
25636 ins_pipe(pipe_slow);
25637 %}
25638
25639 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25640 %{
25641 match(Set dst (AddHF src1 src2));
25642 match(Set dst (DivHF src1 src2));
25643 match(Set dst (MulHF src1 src2));
25644 match(Set dst (SubHF src1 src2));
25645 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25646 ins_encode %{
25647 int opcode = this->ideal_Opcode();
25648 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25649 %}
25650 ins_pipe(pipe_slow);
25651 %}
25652
25653 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25654 %{
25655 predicate(VM_Version::supports_avx10_2());
25656 match(Set dst (MaxHF src1 src2));
25657 match(Set dst (MinHF src1 src2));
25658 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25659 ins_encode %{
25660 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25661 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25662 %}
25663 ins_pipe( pipe_slow );
25664 %}
25665
25666 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25667 %{
25668 predicate(!VM_Version::supports_avx10_2());
25669 match(Set dst (MaxHF src1 src2));
25670 match(Set dst (MinHF src1 src2));
25671 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25672 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25673 ins_encode %{
25674 int opcode = this->ideal_Opcode();
25675 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25676 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25677 %}
25678 ins_pipe( pipe_slow );
25679 %}
25680
25681 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25682 %{
25683 match(Set dst (FmaHF src2 (Binary dst src1)));
25684 effect(DEF dst);
25685 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25686 ins_encode %{
25687 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25688 %}
25689 ins_pipe( pipe_slow );
25690 %}
25691
25692
25693 instruct vector_sqrt_HF_reg(vec dst, vec src)
25694 %{
25695 match(Set dst (SqrtVHF src));
25696 format %{ "vector_sqrt_fp16 $dst, $src" %}
25697 ins_encode %{
25698 int vlen_enc = vector_length_encoding(this);
25699 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25700 %}
25701 ins_pipe(pipe_slow);
25702 %}
25703
25704 instruct vector_sqrt_HF_mem(vec dst, memory src)
25705 %{
25706 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25707 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25708 ins_encode %{
25709 int vlen_enc = vector_length_encoding(this);
25710 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25711 %}
25712 ins_pipe(pipe_slow);
25713 %}
25714
25715 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25716 %{
25717 match(Set dst (AddVHF src1 src2));
25718 match(Set dst (DivVHF src1 src2));
25719 match(Set dst (MulVHF src1 src2));
25720 match(Set dst (SubVHF src1 src2));
25721 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25722 ins_encode %{
25723 int vlen_enc = vector_length_encoding(this);
25724 int opcode = this->ideal_Opcode();
25725 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25726 %}
25727 ins_pipe(pipe_slow);
25728 %}
25729
25730
25731 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25732 %{
25733 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25734 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25735 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25736 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25737 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25738 ins_encode %{
25739 int vlen_enc = vector_length_encoding(this);
25740 int opcode = this->ideal_Opcode();
25741 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25742 %}
25743 ins_pipe(pipe_slow);
25744 %}
25745
25746 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25747 %{
25748 match(Set dst (FmaVHF src2 (Binary dst src1)));
25749 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25750 ins_encode %{
25751 int vlen_enc = vector_length_encoding(this);
25752 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25753 %}
25754 ins_pipe( pipe_slow );
25755 %}
25756
25757 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25758 %{
25759 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25760 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25761 ins_encode %{
25762 int vlen_enc = vector_length_encoding(this);
25763 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25764 %}
25765 ins_pipe( pipe_slow );
25766 %}
25767
25768 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25769 %{
25770 predicate(VM_Version::supports_avx10_2());
25771 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25772 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25773 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25774 ins_encode %{
25775 int vlen_enc = vector_length_encoding(this);
25776 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25777 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25778 %}
25779 ins_pipe( pipe_slow );
25780 %}
25781
25782 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25783 %{
25784 predicate(VM_Version::supports_avx10_2());
25785 match(Set dst (MinVHF src1 src2));
25786 match(Set dst (MaxVHF src1 src2));
25787 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25788 ins_encode %{
25789 int vlen_enc = vector_length_encoding(this);
25790 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25791 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25792 %}
25793 ins_pipe( pipe_slow );
25794 %}
25795
25796 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25797 %{
25798 predicate(!VM_Version::supports_avx10_2());
25799 match(Set dst (MinVHF src1 src2));
25800 match(Set dst (MaxVHF src1 src2));
25801 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25802 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25803 ins_encode %{
25804 int vlen_enc = vector_length_encoding(this);
25805 int opcode = this->ideal_Opcode();
25806 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25807 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25808 %}
25809 ins_pipe( pipe_slow );
25810 %}
25811
25812 //----------PEEPHOLE RULES-----------------------------------------------------
25813 // These must follow all instruction definitions as they use the names
25814 // defined in the instructions definitions.
25815 //
25816 // peeppredicate ( rule_predicate );
25817 // // the predicate unless which the peephole rule will be ignored
25818 //
25819 // peepmatch ( root_instr_name [preceding_instruction]* );
25820 //
25821 // peepprocedure ( procedure_name );
25822 // // provide a procedure name to perform the optimization, the procedure should
25823 // // reside in the architecture dependent peephole file, the method has the
25824 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25825 // // with the arguments being the basic block, the current node index inside the
25826 // // block, the register allocator, the functions upon invoked return a new node
25827 // // defined in peepreplace, and the rules of the nodes appearing in the
25828 // // corresponding peepmatch, the function return true if successful, else
25829 // // return false
25830 //
25831 // peepconstraint %{
25832 // (instruction_number.operand_name relational_op instruction_number.operand_name
25833 // [, ...] );
25834 // // instruction numbers are zero-based using left to right order in peepmatch
25835 //
25836 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25837 // // provide an instruction_number.operand_name for each operand that appears
25838 // // in the replacement instruction's match rule
25839 //
25840 // ---------VM FLAGS---------------------------------------------------------
25841 //
25842 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25843 //
25844 // Each peephole rule is given an identifying number starting with zero and
25845 // increasing by one in the order seen by the parser. An individual peephole
25846 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25847 // on the command-line.
25848 //
25849 // ---------CURRENT LIMITATIONS----------------------------------------------
25850 //
25851 // Only transformations inside a basic block (do we need more for peephole)
25852 //
25853 // ---------EXAMPLE----------------------------------------------------------
25854 //
25855 // // pertinent parts of existing instructions in architecture description
25856 // instruct movI(rRegI dst, rRegI src)
25857 // %{
25858 // match(Set dst (CopyI src));
25859 // %}
25860 //
25861 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25862 // %{
25863 // match(Set dst (AddI dst src));
25864 // effect(KILL cr);
25865 // %}
25866 //
25867 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25868 // %{
25869 // match(Set dst (AddI dst src));
25870 // %}
25871 //
25872 // 1. Simple replacement
25873 // - Only match adjacent instructions in same basic block
25874 // - Only equality constraints
25875 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25876 // - Only one replacement instruction
25877 //
25878 // // Change (inc mov) to lea
25879 // peephole %{
25880 // // lea should only be emitted when beneficial
25881 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25882 // // increment preceded by register-register move
25883 // peepmatch ( incI_rReg movI );
25884 // // require that the destination register of the increment
25885 // // match the destination register of the move
25886 // peepconstraint ( 0.dst == 1.dst );
25887 // // construct a replacement instruction that sets
25888 // // the destination to ( move's source register + one )
25889 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25890 // %}
25891 //
25892 // 2. Procedural replacement
25893 // - More flexible finding relevent nodes
25894 // - More flexible constraints
25895 // - More flexible transformations
25896 // - May utilise architecture-dependent API more effectively
25897 // - Currently only one replacement instruction due to adlc parsing capabilities
25898 //
25899 // // Change (inc mov) to lea
25900 // peephole %{
25901 // // lea should only be emitted when beneficial
25902 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25903 // // the rule numbers of these nodes inside are passed into the function below
25904 // peepmatch ( incI_rReg movI );
25905 // // the method that takes the responsibility of transformation
25906 // peepprocedure ( inc_mov_to_lea );
25907 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25908 // // node is passed into the function above
25909 // peepreplace ( leaI_rReg_immI() );
25910 // %}
25911
25912 // These instructions is not matched by the matcher but used by the peephole
25913 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25914 %{
25915 predicate(false);
25916 match(Set dst (AddI src1 src2));
25917 format %{ "leal $dst, [$src1 + $src2]" %}
25918 ins_encode %{
25919 Register dst = $dst$$Register;
25920 Register src1 = $src1$$Register;
25921 Register src2 = $src2$$Register;
25922 if (src1 != rbp && src1 != r13) {
25923 __ leal(dst, Address(src1, src2, Address::times_1));
25924 } else {
25925 assert(src2 != rbp && src2 != r13, "");
25926 __ leal(dst, Address(src2, src1, Address::times_1));
25927 }
25928 %}
25929 ins_pipe(ialu_reg_reg);
25930 %}
25931
25932 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25933 %{
25934 predicate(false);
25935 match(Set dst (AddI src1 src2));
25936 format %{ "leal $dst, [$src1 + $src2]" %}
25937 ins_encode %{
25938 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25939 %}
25940 ins_pipe(ialu_reg_reg);
25941 %}
25942
25943 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25944 %{
25945 predicate(false);
25946 match(Set dst (LShiftI src shift));
25947 format %{ "leal $dst, [$src << $shift]" %}
25948 ins_encode %{
25949 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25950 Register src = $src$$Register;
25951 if (scale == Address::times_2 && src != rbp && src != r13) {
25952 __ leal($dst$$Register, Address(src, src, Address::times_1));
25953 } else {
25954 __ leal($dst$$Register, Address(noreg, src, scale));
25955 }
25956 %}
25957 ins_pipe(ialu_reg_reg);
25958 %}
25959
25960 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25961 %{
25962 predicate(false);
25963 match(Set dst (AddL src1 src2));
25964 format %{ "leaq $dst, [$src1 + $src2]" %}
25965 ins_encode %{
25966 Register dst = $dst$$Register;
25967 Register src1 = $src1$$Register;
25968 Register src2 = $src2$$Register;
25969 if (src1 != rbp && src1 != r13) {
25970 __ leaq(dst, Address(src1, src2, Address::times_1));
25971 } else {
25972 assert(src2 != rbp && src2 != r13, "");
25973 __ leaq(dst, Address(src2, src1, Address::times_1));
25974 }
25975 %}
25976 ins_pipe(ialu_reg_reg);
25977 %}
25978
25979 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25980 %{
25981 predicate(false);
25982 match(Set dst (AddL src1 src2));
25983 format %{ "leaq $dst, [$src1 + $src2]" %}
25984 ins_encode %{
25985 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25986 %}
25987 ins_pipe(ialu_reg_reg);
25988 %}
25989
25990 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25991 %{
25992 predicate(false);
25993 match(Set dst (LShiftL src shift));
25994 format %{ "leaq $dst, [$src << $shift]" %}
25995 ins_encode %{
25996 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25997 Register src = $src$$Register;
25998 if (scale == Address::times_2 && src != rbp && src != r13) {
25999 __ leaq($dst$$Register, Address(src, src, Address::times_1));
26000 } else {
26001 __ leaq($dst$$Register, Address(noreg, src, scale));
26002 }
26003 %}
26004 ins_pipe(ialu_reg_reg);
26005 %}
26006
26007 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26008 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26009 // processors with at least partial ALU support for lea
26010 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26011 // beneficial for processors with full ALU support
26012 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26013
26014 peephole
26015 %{
26016 peeppredicate(VM_Version::supports_fast_2op_lea());
26017 peepmatch (addI_rReg);
26018 peepprocedure (lea_coalesce_reg);
26019 peepreplace (leaI_rReg_rReg_peep());
26020 %}
26021
26022 peephole
26023 %{
26024 peeppredicate(VM_Version::supports_fast_2op_lea());
26025 peepmatch (addI_rReg_imm);
26026 peepprocedure (lea_coalesce_imm);
26027 peepreplace (leaI_rReg_immI_peep());
26028 %}
26029
26030 peephole
26031 %{
26032 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26033 VM_Version::is_intel_cascade_lake());
26034 peepmatch (incI_rReg);
26035 peepprocedure (lea_coalesce_imm);
26036 peepreplace (leaI_rReg_immI_peep());
26037 %}
26038
26039 peephole
26040 %{
26041 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26042 VM_Version::is_intel_cascade_lake());
26043 peepmatch (decI_rReg);
26044 peepprocedure (lea_coalesce_imm);
26045 peepreplace (leaI_rReg_immI_peep());
26046 %}
26047
26048 peephole
26049 %{
26050 peeppredicate(VM_Version::supports_fast_2op_lea());
26051 peepmatch (salI_rReg_immI2);
26052 peepprocedure (lea_coalesce_imm);
26053 peepreplace (leaI_rReg_immI2_peep());
26054 %}
26055
26056 peephole
26057 %{
26058 peeppredicate(VM_Version::supports_fast_2op_lea());
26059 peepmatch (addL_rReg);
26060 peepprocedure (lea_coalesce_reg);
26061 peepreplace (leaL_rReg_rReg_peep());
26062 %}
26063
26064 peephole
26065 %{
26066 peeppredicate(VM_Version::supports_fast_2op_lea());
26067 peepmatch (addL_rReg_imm);
26068 peepprocedure (lea_coalesce_imm);
26069 peepreplace (leaL_rReg_immL32_peep());
26070 %}
26071
26072 peephole
26073 %{
26074 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26075 VM_Version::is_intel_cascade_lake());
26076 peepmatch (incL_rReg);
26077 peepprocedure (lea_coalesce_imm);
26078 peepreplace (leaL_rReg_immL32_peep());
26079 %}
26080
26081 peephole
26082 %{
26083 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26084 VM_Version::is_intel_cascade_lake());
26085 peepmatch (decL_rReg);
26086 peepprocedure (lea_coalesce_imm);
26087 peepreplace (leaL_rReg_immL32_peep());
26088 %}
26089
26090 peephole
26091 %{
26092 peeppredicate(VM_Version::supports_fast_2op_lea());
26093 peepmatch (salL_rReg_immI2);
26094 peepprocedure (lea_coalesce_imm);
26095 peepreplace (leaL_rReg_immI2_peep());
26096 %}
26097
26098 peephole
26099 %{
26100 peepmatch (leaPCompressedOopOffset);
26101 peepprocedure (lea_remove_redundant);
26102 %}
26103
26104 peephole
26105 %{
26106 peepmatch (leaP8Narrow);
26107 peepprocedure (lea_remove_redundant);
26108 %}
26109
26110 peephole
26111 %{
26112 peepmatch (leaP32Narrow);
26113 peepprocedure (lea_remove_redundant);
26114 %}
26115
26116 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26117 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26118
26119 //int variant
26120 peephole
26121 %{
26122 peepmatch (testI_reg);
26123 peepprocedure (test_may_remove);
26124 %}
26125
26126 //long variant
26127 peephole
26128 %{
26129 peepmatch (testL_reg);
26130 peepprocedure (test_may_remove);
26131 %}
26132
26133
26134 //----------SMARTSPILL RULES---------------------------------------------------
26135 // These must follow all instruction definitions as they use the names
26136 // defined in the instructions definitions.