1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 // If any floating point comparison instruction is used, unordered case always triggers jump
1708 // for below condition, CF=1 is true when at least one input is NaN
1709 Label done;
1710 __ movl(dst, -1);
1711 __ jcc(Assembler::below, done);
1712 __ setcc(Assembler::notEqual, dst);
1713 __ bind(done);
1714 }
1715
1716 enum FP_PREC {
1717 fp_prec_hlf,
1718 fp_prec_flt,
1719 fp_prec_dbl
1720 };
1721
1722 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1723 XMMRegister p, XMMRegister q) {
1724 if (pt == fp_prec_hlf) {
1725 __ evucomish(p, q);
1726 } else if (pt == fp_prec_flt) {
1727 __ ucomiss(p, q);
1728 } else {
1729 __ ucomisd(p, q);
1730 }
1731 }
1732
1733 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1734 XMMRegister dst, XMMRegister src, Register scratch) {
1735 if (pt == fp_prec_hlf) {
1736 __ movhlf(dst, src, scratch);
1737 } else if (pt == fp_prec_flt) {
1738 __ movflt(dst, src);
1739 } else {
1740 __ movdbl(dst, src);
1741 }
1742 }
1743
1744 // Math.min() # Math.max()
1745 // -----------------------------
1746 // (v)ucomis[h/s/d] #
1747 // ja -> b # a
1748 // jp -> NaN # NaN
1749 // jb -> a # b
1750 // je #
1751 // |-jz -> a | b # a & b
1752 // | -> a #
1753 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1754 XMMRegister a, XMMRegister b,
1755 XMMRegister xmmt, Register rt,
1756 bool min, enum FP_PREC pt) {
1757
1758 Label nan, zero, below, above, done;
1759
1760 emit_fp_ucom(masm, pt, a, b);
1761
1762 if (dst->encoding() != (min ? b : a)->encoding()) {
1763 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1764 } else {
1765 __ jccb(Assembler::above, done);
1766 }
1767
1768 __ jccb(Assembler::parity, nan); // PF=1
1769 __ jccb(Assembler::below, below); // CF=1
1770
1771 // equal
1772 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1773 emit_fp_ucom(masm, pt, a, xmmt);
1774
1775 __ jccb(Assembler::equal, zero);
1776 movfp(masm, pt, dst, a, rt);
1777
1778 __ jmp(done);
1779
1780 __ bind(zero);
1781 if (min) {
1782 __ vpor(dst, a, b, Assembler::AVX_128bit);
1783 } else {
1784 __ vpand(dst, a, b, Assembler::AVX_128bit);
1785 }
1786
1787 __ jmp(done);
1788
1789 __ bind(above);
1790 movfp(masm, pt, dst, min ? b : a, rt);
1791
1792 __ jmp(done);
1793
1794 __ bind(nan);
1795 if (pt == fp_prec_hlf) {
1796 __ movl(rt, 0x00007e00); // Float16.NaN
1797 __ evmovw(dst, rt);
1798 } else if (pt == fp_prec_flt) {
1799 __ movl(rt, 0x7fc00000); // Float.NaN
1800 __ movdl(dst, rt);
1801 } else {
1802 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1803 __ movdq(dst, rt);
1804 }
1805 __ jmp(done);
1806
1807 __ bind(below);
1808 movfp(masm, pt, dst, min ? a : b, rt);
1809
1810 __ bind(done);
1811 }
1812
1813 //=============================================================================
1814 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1815
1816 int ConstantTable::calculate_table_base_offset() const {
1817 return 0; // absolute addressing, no offset
1818 }
1819
1820 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1821 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1822 ShouldNotReachHere();
1823 }
1824
1825 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1826 // Empty encoding
1827 }
1828
1829 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1830 return 0;
1831 }
1832
1833 #ifndef PRODUCT
1834 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1835 st->print("# MachConstantBaseNode (empty encoding)");
1836 }
1837 #endif
1838
1839
1840 //=============================================================================
1841 #ifndef PRODUCT
1842 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1843 Compile* C = ra_->C;
1844
1845 int framesize = C->output()->frame_size_in_bytes();
1846 int bangsize = C->output()->bang_size_in_bytes();
1847 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1848 // Remove wordSize for return addr which is already pushed.
1849 framesize -= wordSize;
1850
1851 if (C->output()->need_stack_bang(bangsize)) {
1852 framesize -= wordSize;
1853 st->print("# stack bang (%d bytes)", bangsize);
1854 st->print("\n\t");
1855 st->print("pushq rbp\t# Save rbp");
1856 if (PreserveFramePointer) {
1857 st->print("\n\t");
1858 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1859 }
1860 if (framesize) {
1861 st->print("\n\t");
1862 st->print("subq rsp, #%d\t# Create frame",framesize);
1863 }
1864 } else {
1865 st->print("subq rsp, #%d\t# Create frame",framesize);
1866 st->print("\n\t");
1867 framesize -= wordSize;
1868 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1869 if (PreserveFramePointer) {
1870 st->print("\n\t");
1871 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1872 if (framesize > 0) {
1873 st->print("\n\t");
1874 st->print("addq rbp, #%d", framesize);
1875 }
1876 }
1877 }
1878
1879 if (VerifyStackAtCalls) {
1880 st->print("\n\t");
1881 framesize -= wordSize;
1882 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1883 #ifdef ASSERT
1884 st->print("\n\t");
1885 st->print("# stack alignment check");
1886 #endif
1887 }
1888 if (C->stub_function() != nullptr) {
1889 st->print("\n\t");
1890 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1891 st->print("\n\t");
1892 st->print("je fast_entry\t");
1893 st->print("\n\t");
1894 st->print("call #nmethod_entry_barrier_stub\t");
1895 st->print("\n\tfast_entry:");
1896 }
1897 st->cr();
1898 }
1899 #endif
1900
1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1902 Compile* C = ra_->C;
1903
1904 __ verified_entry(C);
1905
1906 if (ra_->C->stub_function() == nullptr) {
1907 __ entry_barrier();
1908 }
1909
1910 if (!Compile::current()->output()->in_scratch_emit_size()) {
1911 __ bind(*_verified_entry);
1912 }
1913
1914 C->output()->set_frame_complete(__ offset());
1915
1916 if (C->has_mach_constant_base_node()) {
1917 // NOTE: We set the table base offset here because users might be
1918 // emitted before MachConstantBaseNode.
1919 ConstantTable& constant_table = C->output()->constant_table();
1920 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1921 }
1922 }
1923
1924
1925 int MachPrologNode::reloc() const
1926 {
1927 return 0; // a large enough number
1928 }
1929
1930 //=============================================================================
1931 #ifndef PRODUCT
1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1933 {
1934 Compile* C = ra_->C;
1935 if (generate_vzeroupper(C)) {
1936 st->print("vzeroupper");
1937 st->cr(); st->print("\t");
1938 }
1939
1940 int framesize = C->output()->frame_size_in_bytes();
1941 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1942 // Remove word for return adr already pushed
1943 // and RBP
1944 framesize -= 2*wordSize;
1945
1946 if (framesize) {
1947 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1948 st->print("\t");
1949 }
1950
1951 st->print_cr("popq rbp");
1952 if (do_polling() && C->is_method_compilation()) {
1953 st->print("\t");
1954 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1955 "ja #safepoint_stub\t"
1956 "# Safepoint: poll for GC");
1957 }
1958 }
1959 #endif
1960
1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1962 {
1963 Compile* C = ra_->C;
1964
1965 if (generate_vzeroupper(C)) {
1966 // Clear upper bits of YMM registers when current compiled code uses
1967 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1968 __ vzeroupper();
1969 }
1970
1971 // Subtract two words to account for return address and rbp
1972 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1973 __ remove_frame(initial_framesize, C->needs_stack_repair());
1974
1975 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1976 __ reserved_stack_check();
1977 }
1978
1979 if (do_polling() && C->is_method_compilation()) {
1980 Label dummy_label;
1981 Label* code_stub = &dummy_label;
1982 if (!C->output()->in_scratch_emit_size()) {
1983 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1984 C->output()->add_stub(stub);
1985 code_stub = &stub->entry();
1986 }
1987 __ relocate(relocInfo::poll_return_type);
1988 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1989 }
1990 }
1991
1992 int MachEpilogNode::reloc() const
1993 {
1994 return 2; // a large enough number
1995 }
1996
1997 const Pipeline* MachEpilogNode::pipeline() const
1998 {
1999 return MachNode::pipeline_class();
2000 }
2001
2002 //=============================================================================
2003
2004 enum RC {
2005 rc_bad,
2006 rc_int,
2007 rc_kreg,
2008 rc_float,
2009 rc_stack
2010 };
2011
2012 static enum RC rc_class(OptoReg::Name reg)
2013 {
2014 if( !OptoReg::is_valid(reg) ) return rc_bad;
2015
2016 if (OptoReg::is_stack(reg)) return rc_stack;
2017
2018 VMReg r = OptoReg::as_VMReg(reg);
2019
2020 if (r->is_Register()) return rc_int;
2021
2022 if (r->is_KRegister()) return rc_kreg;
2023
2024 assert(r->is_XMMRegister(), "must be");
2025 return rc_float;
2026 }
2027
2028 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2029 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2030 int src_hi, int dst_hi, uint ireg, outputStream* st);
2031
2032 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2033 int stack_offset, int reg, uint ireg, outputStream* st);
2034
2035 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2036 int dst_offset, uint ireg, outputStream* st) {
2037 if (masm) {
2038 switch (ireg) {
2039 case Op_VecS:
2040 __ movq(Address(rsp, -8), rax);
2041 __ movl(rax, Address(rsp, src_offset));
2042 __ movl(Address(rsp, dst_offset), rax);
2043 __ movq(rax, Address(rsp, -8));
2044 break;
2045 case Op_VecD:
2046 __ pushq(Address(rsp, src_offset));
2047 __ popq (Address(rsp, dst_offset));
2048 break;
2049 case Op_VecX:
2050 __ pushq(Address(rsp, src_offset));
2051 __ popq (Address(rsp, dst_offset));
2052 __ pushq(Address(rsp, src_offset+8));
2053 __ popq (Address(rsp, dst_offset+8));
2054 break;
2055 case Op_VecY:
2056 __ vmovdqu(Address(rsp, -32), xmm0);
2057 __ vmovdqu(xmm0, Address(rsp, src_offset));
2058 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2059 __ vmovdqu(xmm0, Address(rsp, -32));
2060 break;
2061 case Op_VecZ:
2062 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2063 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2064 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2065 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2066 break;
2067 default:
2068 ShouldNotReachHere();
2069 }
2070 #ifndef PRODUCT
2071 } else {
2072 switch (ireg) {
2073 case Op_VecS:
2074 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2075 "movl rax, [rsp + #%d]\n\t"
2076 "movl [rsp + #%d], rax\n\t"
2077 "movq rax, [rsp - #8]",
2078 src_offset, dst_offset);
2079 break;
2080 case Op_VecD:
2081 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2082 "popq [rsp + #%d]",
2083 src_offset, dst_offset);
2084 break;
2085 case Op_VecX:
2086 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2087 "popq [rsp + #%d]\n\t"
2088 "pushq [rsp + #%d]\n\t"
2089 "popq [rsp + #%d]",
2090 src_offset, dst_offset, src_offset+8, dst_offset+8);
2091 break;
2092 case Op_VecY:
2093 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2094 "vmovdqu xmm0, [rsp + #%d]\n\t"
2095 "vmovdqu [rsp + #%d], xmm0\n\t"
2096 "vmovdqu xmm0, [rsp - #32]",
2097 src_offset, dst_offset);
2098 break;
2099 case Op_VecZ:
2100 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2101 "vmovdqu xmm0, [rsp + #%d]\n\t"
2102 "vmovdqu [rsp + #%d], xmm0\n\t"
2103 "vmovdqu xmm0, [rsp - #64]",
2104 src_offset, dst_offset);
2105 break;
2106 default:
2107 ShouldNotReachHere();
2108 }
2109 #endif
2110 }
2111 }
2112
2113 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2114 PhaseRegAlloc* ra_,
2115 bool do_size,
2116 outputStream* st) const {
2117 assert(masm != nullptr || st != nullptr, "sanity");
2118 // Get registers to move
2119 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2120 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2121 OptoReg::Name dst_second = ra_->get_reg_second(this);
2122 OptoReg::Name dst_first = ra_->get_reg_first(this);
2123
2124 enum RC src_second_rc = rc_class(src_second);
2125 enum RC src_first_rc = rc_class(src_first);
2126 enum RC dst_second_rc = rc_class(dst_second);
2127 enum RC dst_first_rc = rc_class(dst_first);
2128
2129 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2130 "must move at least 1 register" );
2131
2132 if (src_first == dst_first && src_second == dst_second) {
2133 // Self copy, no move
2134 return 0;
2135 }
2136 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2137 uint ireg = ideal_reg();
2138 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2139 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2140 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2141 // mem -> mem
2142 int src_offset = ra_->reg2offset(src_first);
2143 int dst_offset = ra_->reg2offset(dst_first);
2144 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2145 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2146 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2147 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2148 int stack_offset = ra_->reg2offset(dst_first);
2149 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2150 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2151 int stack_offset = ra_->reg2offset(src_first);
2152 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2153 } else {
2154 ShouldNotReachHere();
2155 }
2156 return 0;
2157 }
2158 if (src_first_rc == rc_stack) {
2159 // mem ->
2160 if (dst_first_rc == rc_stack) {
2161 // mem -> mem
2162 assert(src_second != dst_first, "overlap");
2163 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2164 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2165 // 64-bit
2166 int src_offset = ra_->reg2offset(src_first);
2167 int dst_offset = ra_->reg2offset(dst_first);
2168 if (masm) {
2169 __ pushq(Address(rsp, src_offset));
2170 __ popq (Address(rsp, dst_offset));
2171 #ifndef PRODUCT
2172 } else {
2173 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2174 "popq [rsp + #%d]",
2175 src_offset, dst_offset);
2176 #endif
2177 }
2178 } else {
2179 // 32-bit
2180 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2181 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2182 // No pushl/popl, so:
2183 int src_offset = ra_->reg2offset(src_first);
2184 int dst_offset = ra_->reg2offset(dst_first);
2185 if (masm) {
2186 __ movq(Address(rsp, -8), rax);
2187 __ movl(rax, Address(rsp, src_offset));
2188 __ movl(Address(rsp, dst_offset), rax);
2189 __ movq(rax, Address(rsp, -8));
2190 #ifndef PRODUCT
2191 } else {
2192 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2193 "movl rax, [rsp + #%d]\n\t"
2194 "movl [rsp + #%d], rax\n\t"
2195 "movq rax, [rsp - #8]",
2196 src_offset, dst_offset);
2197 #endif
2198 }
2199 }
2200 return 0;
2201 } else if (dst_first_rc == rc_int) {
2202 // mem -> gpr
2203 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2204 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2205 // 64-bit
2206 int offset = ra_->reg2offset(src_first);
2207 if (masm) {
2208 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2209 #ifndef PRODUCT
2210 } else {
2211 st->print("movq %s, [rsp + #%d]\t# spill",
2212 Matcher::regName[dst_first],
2213 offset);
2214 #endif
2215 }
2216 } else {
2217 // 32-bit
2218 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2219 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2220 int offset = ra_->reg2offset(src_first);
2221 if (masm) {
2222 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2223 #ifndef PRODUCT
2224 } else {
2225 st->print("movl %s, [rsp + #%d]\t# spill",
2226 Matcher::regName[dst_first],
2227 offset);
2228 #endif
2229 }
2230 }
2231 return 0;
2232 } else if (dst_first_rc == rc_float) {
2233 // mem-> xmm
2234 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2235 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2236 // 64-bit
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("%s %s, [rsp + #%d]\t# spill",
2243 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2244 Matcher::regName[dst_first],
2245 offset);
2246 #endif
2247 }
2248 } else {
2249 // 32-bit
2250 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2251 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2252 int offset = ra_->reg2offset(src_first);
2253 if (masm) {
2254 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2255 #ifndef PRODUCT
2256 } else {
2257 st->print("movss %s, [rsp + #%d]\t# spill",
2258 Matcher::regName[dst_first],
2259 offset);
2260 #endif
2261 }
2262 }
2263 return 0;
2264 } else if (dst_first_rc == rc_kreg) {
2265 // mem -> kreg
2266 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2267 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2268 // 64-bit
2269 int offset = ra_->reg2offset(src_first);
2270 if (masm) {
2271 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2272 #ifndef PRODUCT
2273 } else {
2274 st->print("kmovq %s, [rsp + #%d]\t# spill",
2275 Matcher::regName[dst_first],
2276 offset);
2277 #endif
2278 }
2279 }
2280 return 0;
2281 }
2282 } else if (src_first_rc == rc_int) {
2283 // gpr ->
2284 if (dst_first_rc == rc_stack) {
2285 // gpr -> mem
2286 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2287 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2288 // 64-bit
2289 int offset = ra_->reg2offset(dst_first);
2290 if (masm) {
2291 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2292 #ifndef PRODUCT
2293 } else {
2294 st->print("movq [rsp + #%d], %s\t# spill",
2295 offset,
2296 Matcher::regName[src_first]);
2297 #endif
2298 }
2299 } else {
2300 // 32-bit
2301 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2302 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2303 int offset = ra_->reg2offset(dst_first);
2304 if (masm) {
2305 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2306 #ifndef PRODUCT
2307 } else {
2308 st->print("movl [rsp + #%d], %s\t# spill",
2309 offset,
2310 Matcher::regName[src_first]);
2311 #endif
2312 }
2313 }
2314 return 0;
2315 } else if (dst_first_rc == rc_int) {
2316 // gpr -> gpr
2317 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2318 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2319 // 64-bit
2320 if (masm) {
2321 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2322 as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movq %s, %s\t# spill",
2326 Matcher::regName[dst_first],
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 return 0;
2331 } else {
2332 // 32-bit
2333 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2334 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2335 if (masm) {
2336 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2337 as_Register(Matcher::_regEncode[src_first]));
2338 #ifndef PRODUCT
2339 } else {
2340 st->print("movl %s, %s\t# spill",
2341 Matcher::regName[dst_first],
2342 Matcher::regName[src_first]);
2343 #endif
2344 }
2345 return 0;
2346 }
2347 } else if (dst_first_rc == rc_float) {
2348 // gpr -> xmm
2349 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2350 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2351 // 64-bit
2352 if (masm) {
2353 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2354 #ifndef PRODUCT
2355 } else {
2356 st->print("movdq %s, %s\t# spill",
2357 Matcher::regName[dst_first],
2358 Matcher::regName[src_first]);
2359 #endif
2360 }
2361 } else {
2362 // 32-bit
2363 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2364 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2365 if (masm) {
2366 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2367 #ifndef PRODUCT
2368 } else {
2369 st->print("movdl %s, %s\t# spill",
2370 Matcher::regName[dst_first],
2371 Matcher::regName[src_first]);
2372 #endif
2373 }
2374 }
2375 return 0;
2376 } else if (dst_first_rc == rc_kreg) {
2377 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2378 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2379 // 64-bit
2380 if (masm) {
2381 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2382 #ifndef PRODUCT
2383 } else {
2384 st->print("kmovq %s, %s\t# spill",
2385 Matcher::regName[dst_first],
2386 Matcher::regName[src_first]);
2387 #endif
2388 }
2389 }
2390 Unimplemented();
2391 return 0;
2392 }
2393 } else if (src_first_rc == rc_float) {
2394 // xmm ->
2395 if (dst_first_rc == rc_stack) {
2396 // xmm -> mem
2397 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2398 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2399 // 64-bit
2400 int offset = ra_->reg2offset(dst_first);
2401 if (masm) {
2402 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2403 #ifndef PRODUCT
2404 } else {
2405 st->print("movsd [rsp + #%d], %s\t# spill",
2406 offset,
2407 Matcher::regName[src_first]);
2408 #endif
2409 }
2410 } else {
2411 // 32-bit
2412 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2413 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2414 int offset = ra_->reg2offset(dst_first);
2415 if (masm) {
2416 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2417 #ifndef PRODUCT
2418 } else {
2419 st->print("movss [rsp + #%d], %s\t# spill",
2420 offset,
2421 Matcher::regName[src_first]);
2422 #endif
2423 }
2424 }
2425 return 0;
2426 } else if (dst_first_rc == rc_int) {
2427 // xmm -> gpr
2428 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2429 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2430 // 64-bit
2431 if (masm) {
2432 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2433 #ifndef PRODUCT
2434 } else {
2435 st->print("movdq %s, %s\t# spill",
2436 Matcher::regName[dst_first],
2437 Matcher::regName[src_first]);
2438 #endif
2439 }
2440 } else {
2441 // 32-bit
2442 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2443 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2444 if (masm) {
2445 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2446 #ifndef PRODUCT
2447 } else {
2448 st->print("movdl %s, %s\t# spill",
2449 Matcher::regName[dst_first],
2450 Matcher::regName[src_first]);
2451 #endif
2452 }
2453 }
2454 return 0;
2455 } else if (dst_first_rc == rc_float) {
2456 // xmm -> xmm
2457 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2458 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2459 // 64-bit
2460 if (masm) {
2461 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2462 #ifndef PRODUCT
2463 } else {
2464 st->print("%s %s, %s\t# spill",
2465 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2466 Matcher::regName[dst_first],
2467 Matcher::regName[src_first]);
2468 #endif
2469 }
2470 } else {
2471 // 32-bit
2472 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2473 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2474 if (masm) {
2475 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2476 #ifndef PRODUCT
2477 } else {
2478 st->print("%s %s, %s\t# spill",
2479 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2480 Matcher::regName[dst_first],
2481 Matcher::regName[src_first]);
2482 #endif
2483 }
2484 }
2485 return 0;
2486 } else if (dst_first_rc == rc_kreg) {
2487 assert(false, "Illegal spilling");
2488 return 0;
2489 }
2490 } else if (src_first_rc == rc_kreg) {
2491 if (dst_first_rc == rc_stack) {
2492 // mem -> kreg
2493 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2494 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2495 // 64-bit
2496 int offset = ra_->reg2offset(dst_first);
2497 if (masm) {
2498 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2499 #ifndef PRODUCT
2500 } else {
2501 st->print("kmovq [rsp + #%d] , %s\t# spill",
2502 offset,
2503 Matcher::regName[src_first]);
2504 #endif
2505 }
2506 }
2507 return 0;
2508 } else if (dst_first_rc == rc_int) {
2509 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2510 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2511 // 64-bit
2512 if (masm) {
2513 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2514 #ifndef PRODUCT
2515 } else {
2516 st->print("kmovq %s, %s\t# spill",
2517 Matcher::regName[dst_first],
2518 Matcher::regName[src_first]);
2519 #endif
2520 }
2521 }
2522 Unimplemented();
2523 return 0;
2524 } else if (dst_first_rc == rc_kreg) {
2525 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2526 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2527 // 64-bit
2528 if (masm) {
2529 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2530 #ifndef PRODUCT
2531 } else {
2532 st->print("kmovq %s, %s\t# spill",
2533 Matcher::regName[dst_first],
2534 Matcher::regName[src_first]);
2535 #endif
2536 }
2537 }
2538 return 0;
2539 } else if (dst_first_rc == rc_float) {
2540 assert(false, "Illegal spill");
2541 return 0;
2542 }
2543 }
2544
2545 assert(0," foo ");
2546 Unimplemented();
2547 return 0;
2548 }
2549
2550 #ifndef PRODUCT
2551 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2552 implementation(nullptr, ra_, false, st);
2553 }
2554 #endif
2555
2556 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2557 implementation(masm, ra_, false, nullptr);
2558 }
2559
2560 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2561 return MachNode::size(ra_);
2562 }
2563
2564 //=============================================================================
2565 #ifndef PRODUCT
2566 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2567 {
2568 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2569 int reg = ra_->get_reg_first(this);
2570 st->print("leaq %s, [rsp + #%d]\t# box lock",
2571 Matcher::regName[reg], offset);
2572 }
2573 #endif
2574
2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2576 {
2577 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2578 int reg = ra_->get_encode(this);
2579
2580 __ lea(as_Register(reg), Address(rsp, offset));
2581 }
2582
2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 if (ra_->get_encode(this) > 15) {
2587 return (offset < 0x80) ? 6 : 9; // REX2
2588 } else {
2589 return (offset < 0x80) ? 5 : 8; // REX
2590 }
2591 }
2592
2593 //=============================================================================
2594 #ifndef PRODUCT
2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2596 {
2597 st->print_cr("MachVEPNode");
2598 }
2599 #endif
2600
2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 CodeBuffer* cbuf = masm->code();
2604 uint insts_size = cbuf->insts_size();
2605 if (!_verified) {
2606 __ ic_check(1);
2607 } else {
2608 // TODO 8284443 Avoid creation of temporary frame
2609 if (ra_->C->stub_function() == nullptr) {
2610 __ verified_entry(ra_->C, 0);
2611 __ entry_barrier();
2612 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2613 __ remove_frame(initial_framesize, false);
2614 }
2615 // Unpack inline type args passed as oop and then jump to
2616 // the verified entry point (skipping the unverified entry).
2617 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2618 // Emit code for verified entry and save increment for stack repair on return
2619 __ verified_entry(ra_->C, sp_inc);
2620 if (Compile::current()->output()->in_scratch_emit_size()) {
2621 Label dummy_verified_entry;
2622 __ jmp(dummy_verified_entry);
2623 } else {
2624 __ jmp(*_verified_entry);
2625 }
2626 }
2627 /* WARNING these NOPs are critical so that verified entry point is properly
2628 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2629 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2630 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2631 if (nops_cnt > 0) {
2632 __ nop(nops_cnt);
2633 }
2634 }
2635
2636 //=============================================================================
2637 #ifndef PRODUCT
2638 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2639 {
2640 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2641 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2642 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2643 }
2644 #endif
2645
2646 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2647 {
2648 __ ic_check(InteriorEntryAlignment);
2649 }
2650
2651
2652 //=============================================================================
2653
2654 bool Matcher::supports_vector_calling_convention(void) {
2655 return EnableVectorSupport;
2656 }
2657
2658 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2659 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2660 }
2661
2662 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2663 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2664 }
2665
2666 #ifdef ASSERT
2667 static bool is_ndd_demotable(const MachNode* mdef) {
2668 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2669 }
2670 #endif
2671
2672 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2673 int oper_index) {
2674 if (mdef == nullptr) {
2675 return false;
2676 }
2677
2678 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2679 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2680 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2681 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2682 return false;
2683 }
2684
2685 // Complex memory operand covers multiple incoming edges needed for
2686 // address computation. Biasing def towards any address component will not
2687 // result in NDD demotion by assembler.
2688 if (mdef->operand_num_edges(oper_index) != 1) {
2689 return false;
2690 }
2691
2692 // Demotion candidate must be register mask compatible with definition.
2693 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2694 if (!oper_mask.overlap(mdef->out_RegMask())) {
2695 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2696 return false;
2697 }
2698
2699 switch (oper_index) {
2700 // First operand of MachNode corresponding to Intel APX NDD selection
2701 // pattern can share its assigned register with definition operand if
2702 // their live ranges do not overlap. In such a scenario we can demote
2703 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2704 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2705 // are decorated with a special flag by instruction selector.
2706 case 1:
2707 return is_ndd_demotable_opr1(mdef);
2708
2709 // Definition operand of commutative operation can be biased towards second
2710 // operand.
2711 case 2:
2712 return is_ndd_demotable_opr2(mdef);
2713
2714 // Current scheme only selects up to two biasing candidates
2715 default:
2716 assert(false, "unhandled operand index: %s", mdef->Name());
2717 break;
2718 }
2719
2720 return false;
2721 }
2722
2723 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2724 assert(EnableVectorSupport, "sanity");
2725 int lo = XMM0_num;
2726 int hi = XMM0b_num;
2727 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2728 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2729 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2730 return OptoRegPair(hi, lo);
2731 }
2732
2733 // Is this branch offset short enough that a short branch can be used?
2734 //
2735 // NOTE: If the platform does not provide any short branch variants, then
2736 // this method should return false for offset 0.
2737 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2738 // The passed offset is relative to address of the branch.
2739 // On 86 a branch displacement is calculated relative to address
2740 // of a next instruction.
2741 offset -= br_size;
2742
2743 // the short version of jmpConUCF2 contains multiple branches,
2744 // making the reach slightly less
2745 if (rule == jmpConUCF2_rule)
2746 return (-126 <= offset && offset <= 125);
2747 return (-128 <= offset && offset <= 127);
2748 }
2749
2750 #ifdef ASSERT
2751 // Return whether or not this register is ever used as an argument.
2752 bool Matcher::can_be_java_arg(int reg)
2753 {
2754 return
2755 reg == RDI_num || reg == RDI_H_num ||
2756 reg == RSI_num || reg == RSI_H_num ||
2757 reg == RDX_num || reg == RDX_H_num ||
2758 reg == RCX_num || reg == RCX_H_num ||
2759 reg == R8_num || reg == R8_H_num ||
2760 reg == R9_num || reg == R9_H_num ||
2761 reg == R12_num || reg == R12_H_num ||
2762 reg == XMM0_num || reg == XMM0b_num ||
2763 reg == XMM1_num || reg == XMM1b_num ||
2764 reg == XMM2_num || reg == XMM2b_num ||
2765 reg == XMM3_num || reg == XMM3b_num ||
2766 reg == XMM4_num || reg == XMM4b_num ||
2767 reg == XMM5_num || reg == XMM5b_num ||
2768 reg == XMM6_num || reg == XMM6b_num ||
2769 reg == XMM7_num || reg == XMM7b_num;
2770 }
2771 #endif
2772
2773 uint Matcher::int_pressure_limit()
2774 {
2775 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2776 }
2777
2778 uint Matcher::float_pressure_limit()
2779 {
2780 // After experiment around with different values, the following default threshold
2781 // works best for LCM's register pressure scheduling on x64.
2782 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2783 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2784 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2785 }
2786
2787 // Register for DIVI projection of divmodI
2788 const RegMask& Matcher::divI_proj_mask() {
2789 return INT_RAX_REG_mask();
2790 }
2791
2792 // Register for MODI projection of divmodI
2793 const RegMask& Matcher::modI_proj_mask() {
2794 return INT_RDX_REG_mask();
2795 }
2796
2797 // Register for DIVL projection of divmodL
2798 const RegMask& Matcher::divL_proj_mask() {
2799 return LONG_RAX_REG_mask();
2800 }
2801
2802 // Register for MODL projection of divmodL
2803 const RegMask& Matcher::modL_proj_mask() {
2804 return LONG_RDX_REG_mask();
2805 }
2806
2807 %}
2808
2809 source_hpp %{
2810 // Header information of the source block.
2811 // Method declarations/definitions which are used outside
2812 // the ad-scope can conveniently be defined here.
2813 //
2814 // To keep related declarations/definitions/uses close together,
2815 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2816
2817 #include "runtime/vm_version.hpp"
2818
2819 class NativeJump;
2820
2821 class CallStubImpl {
2822
2823 //--------------------------------------------------------------
2824 //---< Used for optimization in Compile::shorten_branches >---
2825 //--------------------------------------------------------------
2826
2827 public:
2828 // Size of call trampoline stub.
2829 static uint size_call_trampoline() {
2830 return 0; // no call trampolines on this platform
2831 }
2832
2833 // number of relocations needed by a call trampoline stub
2834 static uint reloc_call_trampoline() {
2835 return 0; // no call trampolines on this platform
2836 }
2837 };
2838
2839 class HandlerImpl {
2840
2841 public:
2842
2843 static int emit_deopt_handler(C2_MacroAssembler* masm);
2844
2845 static uint size_deopt_handler() {
2846 // one call and one jmp.
2847 return 7;
2848 }
2849 };
2850
2851 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2852 switch(bytes) {
2853 case 4: // fall-through
2854 case 8: // fall-through
2855 case 16: return Assembler::AVX_128bit;
2856 case 32: return Assembler::AVX_256bit;
2857 case 64: return Assembler::AVX_512bit;
2858
2859 default: {
2860 ShouldNotReachHere();
2861 return Assembler::AVX_NoVec;
2862 }
2863 }
2864 }
2865
2866 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2867 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2868 }
2869
2870 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2871 uint def_idx = use->operand_index(opnd);
2872 Node* def = use->in(def_idx);
2873 return vector_length_encoding(def);
2874 }
2875
2876 static inline bool is_vector_popcount_predicate(BasicType bt) {
2877 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2878 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2879 }
2880
2881 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2882 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2883 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2884 }
2885
2886 class Node::PD {
2887 public:
2888 enum NodeFlags : uint64_t {
2889 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2890 Flag_sets_carry_flag = Node::_last_flag << 2,
2891 Flag_sets_parity_flag = Node::_last_flag << 3,
2892 Flag_sets_zero_flag = Node::_last_flag << 4,
2893 Flag_sets_overflow_flag = Node::_last_flag << 5,
2894 Flag_sets_sign_flag = Node::_last_flag << 6,
2895 Flag_clears_carry_flag = Node::_last_flag << 7,
2896 Flag_clears_parity_flag = Node::_last_flag << 8,
2897 Flag_clears_zero_flag = Node::_last_flag << 9,
2898 Flag_clears_overflow_flag = Node::_last_flag << 10,
2899 Flag_clears_sign_flag = Node::_last_flag << 11,
2900 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2901 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2902 _last_flag = Flag_ndd_demotable_opr2
2903 };
2904 };
2905
2906 %} // end source_hpp
2907
2908 source %{
2909
2910 #include "opto/addnode.hpp"
2911 #include "c2_intelJccErratum_x86.hpp"
2912
2913 void PhaseOutput::pd_perform_mach_node_analysis() {
2914 if (VM_Version::has_intel_jcc_erratum()) {
2915 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2916 _buf_sizes._code += extra_padding;
2917 }
2918 }
2919
2920 int MachNode::pd_alignment_required() const {
2921 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2922 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2923 return IntelJccErratum::largest_jcc_size() + 1;
2924 } else {
2925 return 1;
2926 }
2927 }
2928
2929 int MachNode::compute_padding(int current_offset) const {
2930 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2931 Compile* C = Compile::current();
2932 PhaseOutput* output = C->output();
2933 Block* block = output->block();
2934 int index = output->index();
2935 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2936 } else {
2937 return 0;
2938 }
2939 }
2940
2941 // Emit deopt handler code.
2942 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2943
2944 // Note that the code buffer's insts_mark is always relative to insts.
2945 // That's why we must use the macroassembler to generate a handler.
2946 address base = __ start_a_stub(size_deopt_handler());
2947 if (base == nullptr) {
2948 ciEnv::current()->record_failure("CodeCache is full");
2949 return 0; // CodeBuffer::expand failed
2950 }
2951 int offset = __ offset();
2952
2953 Label start;
2954 __ bind(start);
2955
2956 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2957
2958 int entry_offset = __ offset();
2959
2960 __ jmp(start);
2961
2962 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2963 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2964 "out of bounds read in post-call NOP check");
2965 __ end_a_stub();
2966 return entry_offset;
2967 }
2968
2969 static Assembler::Width widthForType(BasicType bt) {
2970 if (bt == T_BYTE) {
2971 return Assembler::B;
2972 } else if (bt == T_SHORT) {
2973 return Assembler::W;
2974 } else if (bt == T_INT) {
2975 return Assembler::D;
2976 } else {
2977 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2978 return Assembler::Q;
2979 }
2980 }
2981
2982 //=============================================================================
2983
2984 // Float masks come from different places depending on platform.
2985 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2986 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2987 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2988 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2989 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2990 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2991 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2992 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2993 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2994 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2995 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2996 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2997 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2998 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2999 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
3000 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
3001 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
3002 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
3003 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
3004
3005 //=============================================================================
3006 bool Matcher::match_rule_supported(int opcode) {
3007 if (!has_match_rule(opcode)) {
3008 return false; // no match rule present
3009 }
3010 switch (opcode) {
3011 case Op_AbsVL:
3012 case Op_StoreVectorScatter:
3013 if (UseAVX < 3) {
3014 return false;
3015 }
3016 break;
3017 case Op_PopCountI:
3018 case Op_PopCountL:
3019 if (!UsePopCountInstruction) {
3020 return false;
3021 }
3022 break;
3023 case Op_PopCountVI:
3024 if (UseAVX < 2) {
3025 return false;
3026 }
3027 break;
3028 case Op_CompressV:
3029 case Op_ExpandV:
3030 case Op_PopCountVL:
3031 if (UseAVX < 2) {
3032 return false;
3033 }
3034 break;
3035 case Op_MulVI:
3036 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3037 return false;
3038 }
3039 break;
3040 case Op_MulVL:
3041 if (UseSSE < 4) { // only with SSE4_1 or AVX
3042 return false;
3043 }
3044 break;
3045 case Op_MulReductionVL:
3046 if (VM_Version::supports_avx512dq() == false) {
3047 return false;
3048 }
3049 break;
3050 case Op_AbsVB:
3051 case Op_AbsVS:
3052 case Op_AbsVI:
3053 case Op_AddReductionVI:
3054 case Op_AndReductionV:
3055 case Op_OrReductionV:
3056 case Op_XorReductionV:
3057 if (UseSSE < 3) { // requires at least SSSE3
3058 return false;
3059 }
3060 break;
3061 case Op_MaxHF:
3062 case Op_MinHF:
3063 if (!VM_Version::supports_avx512vlbw()) {
3064 return false;
3065 } // fallthrough
3066 case Op_AddHF:
3067 case Op_DivHF:
3068 case Op_FmaHF:
3069 case Op_MulHF:
3070 case Op_ReinterpretS2HF:
3071 case Op_ReinterpretHF2S:
3072 case Op_SubHF:
3073 case Op_SqrtHF:
3074 if (!VM_Version::supports_avx512_fp16()) {
3075 return false;
3076 }
3077 break;
3078 case Op_VectorLoadShuffle:
3079 case Op_VectorRearrange:
3080 case Op_MulReductionVI:
3081 if (UseSSE < 4) { // requires at least SSE4
3082 return false;
3083 }
3084 break;
3085 case Op_IsInfiniteF:
3086 case Op_IsInfiniteD:
3087 if (!VM_Version::supports_avx512dq()) {
3088 return false;
3089 }
3090 break;
3091 case Op_SqrtVD:
3092 case Op_SqrtVF:
3093 case Op_VectorMaskCmp:
3094 case Op_VectorCastB2X:
3095 case Op_VectorCastS2X:
3096 case Op_VectorCastI2X:
3097 case Op_VectorCastL2X:
3098 case Op_VectorCastF2X:
3099 case Op_VectorCastD2X:
3100 case Op_VectorUCastB2X:
3101 case Op_VectorUCastS2X:
3102 case Op_VectorUCastI2X:
3103 case Op_VectorMaskCast:
3104 if (UseAVX < 1) { // enabled for AVX only
3105 return false;
3106 }
3107 break;
3108 case Op_PopulateIndex:
3109 if (UseAVX < 2) {
3110 return false;
3111 }
3112 break;
3113 case Op_RoundVF:
3114 if (UseAVX < 2) { // enabled for AVX2 only
3115 return false;
3116 }
3117 break;
3118 case Op_RoundVD:
3119 if (UseAVX < 3) {
3120 return false; // enabled for AVX3 only
3121 }
3122 break;
3123 case Op_CompareAndSwapL:
3124 case Op_CompareAndSwapP:
3125 break;
3126 case Op_StrIndexOf:
3127 if (!UseSSE42Intrinsics) {
3128 return false;
3129 }
3130 break;
3131 case Op_StrIndexOfChar:
3132 if (!UseSSE42Intrinsics) {
3133 return false;
3134 }
3135 break;
3136 case Op_OnSpinWait:
3137 if (VM_Version::supports_on_spin_wait() == false) {
3138 return false;
3139 }
3140 break;
3141 case Op_MulVB:
3142 case Op_LShiftVB:
3143 case Op_RShiftVB:
3144 case Op_URShiftVB:
3145 case Op_VectorInsert:
3146 case Op_VectorLoadMask:
3147 case Op_VectorStoreMask:
3148 case Op_VectorBlend:
3149 if (UseSSE < 4) {
3150 return false;
3151 }
3152 break;
3153 case Op_MaxD:
3154 case Op_MaxF:
3155 case Op_MinD:
3156 case Op_MinF:
3157 if (UseAVX < 1) { // enabled for AVX only
3158 return false;
3159 }
3160 break;
3161 case Op_CacheWB:
3162 case Op_CacheWBPreSync:
3163 case Op_CacheWBPostSync:
3164 if (!VM_Version::supports_data_cache_line_flush()) {
3165 return false;
3166 }
3167 break;
3168 case Op_ExtractB:
3169 case Op_ExtractL:
3170 case Op_ExtractI:
3171 case Op_RoundDoubleMode:
3172 if (UseSSE < 4) {
3173 return false;
3174 }
3175 break;
3176 case Op_RoundDoubleModeV:
3177 if (VM_Version::supports_avx() == false) {
3178 return false; // 128bit vroundpd is not available
3179 }
3180 break;
3181 case Op_LoadVectorGather:
3182 case Op_LoadVectorGatherMasked:
3183 if (UseAVX < 2) {
3184 return false;
3185 }
3186 break;
3187 case Op_FmaF:
3188 case Op_FmaD:
3189 case Op_FmaVD:
3190 case Op_FmaVF:
3191 if (!UseFMA) {
3192 return false;
3193 }
3194 break;
3195 case Op_MacroLogicV:
3196 if (UseAVX < 3 || !UseVectorMacroLogic) {
3197 return false;
3198 }
3199 break;
3200
3201 case Op_VectorCmpMasked:
3202 case Op_VectorMaskGen:
3203 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3204 return false;
3205 }
3206 break;
3207 case Op_VectorMaskFirstTrue:
3208 case Op_VectorMaskLastTrue:
3209 case Op_VectorMaskTrueCount:
3210 case Op_VectorMaskToLong:
3211 if (UseAVX < 1) {
3212 return false;
3213 }
3214 break;
3215 case Op_RoundF:
3216 case Op_RoundD:
3217 break;
3218 case Op_CopySignD:
3219 case Op_CopySignF:
3220 if (UseAVX < 3) {
3221 return false;
3222 }
3223 if (!VM_Version::supports_avx512vl()) {
3224 return false;
3225 }
3226 break;
3227 case Op_CompressBits:
3228 case Op_ExpandBits:
3229 if (!VM_Version::supports_bmi2()) {
3230 return false;
3231 }
3232 break;
3233 case Op_CompressM:
3234 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3235 return false;
3236 }
3237 break;
3238 case Op_ConvF2HF:
3239 case Op_ConvHF2F:
3240 if (!VM_Version::supports_float16()) {
3241 return false;
3242 }
3243 break;
3244 case Op_VectorCastF2HF:
3245 case Op_VectorCastHF2F:
3246 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3247 return false;
3248 }
3249 break;
3250 }
3251 return true; // Match rules are supported by default.
3252 }
3253
3254 //------------------------------------------------------------------------
3255
3256 static inline bool is_pop_count_instr_target(BasicType bt) {
3257 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3258 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3259 }
3260
3261 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3262 return match_rule_supported_vector(opcode, vlen, bt);
3263 }
3264
3265 // Identify extra cases that we might want to provide match rules for vector nodes and
3266 // other intrinsics guarded with vector length (vlen) and element type (bt).
3267 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3268 if (!match_rule_supported(opcode)) {
3269 return false;
3270 }
3271 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3272 // * SSE2 supports 128bit vectors for all types;
3273 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3274 // * AVX2 supports 256bit vectors for all types;
3275 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3276 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3277 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3278 // And MaxVectorSize is taken into account as well.
3279 if (!vector_size_supported(bt, vlen)) {
3280 return false;
3281 }
3282 // Special cases which require vector length follow:
3283 // * implementation limitations
3284 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3285 // * 128bit vroundpd instruction is present only in AVX1
3286 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3287 switch (opcode) {
3288 case Op_MaxVHF:
3289 case Op_MinVHF:
3290 if (!VM_Version::supports_avx512bw()) {
3291 return false;
3292 }
3293 case Op_AddVHF:
3294 case Op_DivVHF:
3295 case Op_FmaVHF:
3296 case Op_MulVHF:
3297 case Op_SubVHF:
3298 case Op_SqrtVHF:
3299 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3300 return false;
3301 }
3302 if (!VM_Version::supports_avx512_fp16()) {
3303 return false;
3304 }
3305 break;
3306 case Op_AbsVF:
3307 case Op_NegVF:
3308 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3309 return false; // 512bit vandps and vxorps are not available
3310 }
3311 break;
3312 case Op_AbsVD:
3313 case Op_NegVD:
3314 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3315 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3316 }
3317 break;
3318 case Op_RotateRightV:
3319 case Op_RotateLeftV:
3320 if (bt != T_INT && bt != T_LONG) {
3321 return false;
3322 } // fallthrough
3323 case Op_MacroLogicV:
3324 if (!VM_Version::supports_evex() ||
3325 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3326 return false;
3327 }
3328 break;
3329 case Op_ClearArray:
3330 case Op_VectorMaskGen:
3331 case Op_VectorCmpMasked:
3332 if (!VM_Version::supports_avx512bw()) {
3333 return false;
3334 }
3335 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3336 return false;
3337 }
3338 break;
3339 case Op_LoadVectorMasked:
3340 case Op_StoreVectorMasked:
3341 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3342 return false;
3343 }
3344 break;
3345 case Op_UMinV:
3346 case Op_UMaxV:
3347 if (UseAVX == 0) {
3348 return false;
3349 }
3350 break;
3351 case Op_UMinReductionV:
3352 case Op_UMaxReductionV:
3353 if (UseAVX == 0) {
3354 return false;
3355 }
3356 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3357 return false;
3358 }
3359 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3360 return false;
3361 }
3362 break;
3363 case Op_MaxV:
3364 case Op_MinV:
3365 if (UseSSE < 4 && is_integral_type(bt)) {
3366 return false;
3367 }
3368 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3369 // Float/Double intrinsics are enabled for AVX family currently.
3370 if (UseAVX == 0) {
3371 return false;
3372 }
3373 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3374 return false;
3375 }
3376 }
3377 break;
3378 case Op_CallLeafVector:
3379 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3380 return false;
3381 }
3382 break;
3383 case Op_AddReductionVI:
3384 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3385 return false;
3386 }
3387 // fallthrough
3388 case Op_AndReductionV:
3389 case Op_OrReductionV:
3390 case Op_XorReductionV:
3391 if (is_subword_type(bt) && (UseSSE < 4)) {
3392 return false;
3393 }
3394 break;
3395 case Op_MinReductionV:
3396 case Op_MaxReductionV:
3397 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3398 return false;
3399 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3400 return false;
3401 }
3402 // Float/Double intrinsics enabled for AVX family.
3403 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3404 return false;
3405 }
3406 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3407 return false;
3408 }
3409 break;
3410 case Op_VectorBlend:
3411 if (UseAVX == 0 && size_in_bits < 128) {
3412 return false;
3413 }
3414 break;
3415 case Op_VectorTest:
3416 if (UseSSE < 4) {
3417 return false; // Implementation limitation
3418 } else if (size_in_bits < 32) {
3419 return false; // Implementation limitation
3420 }
3421 break;
3422 case Op_VectorLoadShuffle:
3423 case Op_VectorRearrange:
3424 if(vlen == 2) {
3425 return false; // Implementation limitation due to how shuffle is loaded
3426 } else if (size_in_bits == 256 && UseAVX < 2) {
3427 return false; // Implementation limitation
3428 }
3429 break;
3430 case Op_VectorLoadMask:
3431 case Op_VectorMaskCast:
3432 if (size_in_bits == 256 && UseAVX < 2) {
3433 return false; // Implementation limitation
3434 }
3435 // fallthrough
3436 case Op_VectorStoreMask:
3437 if (vlen == 2) {
3438 return false; // Implementation limitation
3439 }
3440 break;
3441 case Op_PopulateIndex:
3442 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3443 return false;
3444 }
3445 break;
3446 case Op_VectorCastB2X:
3447 case Op_VectorCastS2X:
3448 case Op_VectorCastI2X:
3449 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3450 return false;
3451 }
3452 break;
3453 case Op_VectorCastL2X:
3454 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3455 return false;
3456 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3457 return false;
3458 }
3459 break;
3460 case Op_VectorCastF2X: {
3461 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3462 // happen after intermediate conversion to integer and special handling
3463 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3464 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3465 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3466 return false;
3467 }
3468 }
3469 // fallthrough
3470 case Op_VectorCastD2X:
3471 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3472 return false;
3473 }
3474 break;
3475 case Op_VectorCastF2HF:
3476 case Op_VectorCastHF2F:
3477 if (!VM_Version::supports_f16c() &&
3478 ((!VM_Version::supports_evex() ||
3479 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3480 return false;
3481 }
3482 break;
3483 case Op_RoundVD:
3484 if (!VM_Version::supports_avx512dq()) {
3485 return false;
3486 }
3487 break;
3488 case Op_MulReductionVI:
3489 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3490 return false;
3491 }
3492 break;
3493 case Op_LoadVectorGatherMasked:
3494 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3495 return false;
3496 }
3497 if (is_subword_type(bt) &&
3498 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3499 (size_in_bits < 64) ||
3500 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3501 return false;
3502 }
3503 break;
3504 case Op_StoreVectorScatterMasked:
3505 case Op_StoreVectorScatter:
3506 if (is_subword_type(bt)) {
3507 return false;
3508 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3509 return false;
3510 }
3511 // fallthrough
3512 case Op_LoadVectorGather:
3513 if (!is_subword_type(bt) && size_in_bits == 64) {
3514 return false;
3515 }
3516 if (is_subword_type(bt) && size_in_bits < 64) {
3517 return false;
3518 }
3519 break;
3520 case Op_SaturatingAddV:
3521 case Op_SaturatingSubV:
3522 if (UseAVX < 1) {
3523 return false; // Implementation limitation
3524 }
3525 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3526 return false;
3527 }
3528 break;
3529 case Op_SelectFromTwoVector:
3530 if (size_in_bits < 128) {
3531 return false;
3532 }
3533 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3537 return false;
3538 }
3539 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3540 return false;
3541 }
3542 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3543 return false;
3544 }
3545 break;
3546 case Op_MaskAll:
3547 if (!VM_Version::supports_evex()) {
3548 return false;
3549 }
3550 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3551 return false;
3552 }
3553 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3554 return false;
3555 }
3556 break;
3557 case Op_VectorMaskCmp:
3558 if (vlen < 2 || size_in_bits < 32) {
3559 return false;
3560 }
3561 break;
3562 case Op_CompressM:
3563 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3564 return false;
3565 }
3566 break;
3567 case Op_CompressV:
3568 case Op_ExpandV:
3569 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3570 return false;
3571 }
3572 if (size_in_bits < 128 ) {
3573 return false;
3574 }
3575 case Op_VectorLongToMask:
3576 if (UseAVX < 1) {
3577 return false;
3578 }
3579 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3580 return false;
3581 }
3582 break;
3583 case Op_SignumVD:
3584 case Op_SignumVF:
3585 if (UseAVX < 1) {
3586 return false;
3587 }
3588 break;
3589 case Op_PopCountVI:
3590 case Op_PopCountVL: {
3591 if (!is_pop_count_instr_target(bt) &&
3592 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3593 return false;
3594 }
3595 }
3596 break;
3597 case Op_ReverseV:
3598 case Op_ReverseBytesV:
3599 if (UseAVX < 2) {
3600 return false;
3601 }
3602 break;
3603 case Op_CountTrailingZerosV:
3604 case Op_CountLeadingZerosV:
3605 if (UseAVX < 2) {
3606 return false;
3607 }
3608 break;
3609 }
3610 return true; // Per default match rules are supported.
3611 }
3612
3613 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3614 // ADLC based match_rule_supported routine checks for the existence of pattern based
3615 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3616 // of their non-masked counterpart with mask edge being the differentiator.
3617 // This routine does a strict check on the existence of masked operation patterns
3618 // by returning a default false value for all the other opcodes apart from the
3619 // ones whose masked instruction patterns are defined in this file.
3620 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3621 return false;
3622 }
3623
3624 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3625 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3626 return false;
3627 }
3628 switch(opcode) {
3629 // Unary masked operations
3630 case Op_AbsVB:
3631 case Op_AbsVS:
3632 if(!VM_Version::supports_avx512bw()) {
3633 return false; // Implementation limitation
3634 }
3635 case Op_AbsVI:
3636 case Op_AbsVL:
3637 return true;
3638
3639 // Ternary masked operations
3640 case Op_FmaVF:
3641 case Op_FmaVD:
3642 return true;
3643
3644 case Op_MacroLogicV:
3645 if(bt != T_INT && bt != T_LONG) {
3646 return false;
3647 }
3648 return true;
3649
3650 // Binary masked operations
3651 case Op_AddVB:
3652 case Op_AddVS:
3653 case Op_SubVB:
3654 case Op_SubVS:
3655 case Op_MulVS:
3656 case Op_LShiftVS:
3657 case Op_RShiftVS:
3658 case Op_URShiftVS:
3659 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3660 if (!VM_Version::supports_avx512bw()) {
3661 return false; // Implementation limitation
3662 }
3663 return true;
3664
3665 case Op_MulVL:
3666 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3667 if (!VM_Version::supports_avx512dq()) {
3668 return false; // Implementation limitation
3669 }
3670 return true;
3671
3672 case Op_AndV:
3673 case Op_OrV:
3674 case Op_XorV:
3675 case Op_RotateRightV:
3676 case Op_RotateLeftV:
3677 if (bt != T_INT && bt != T_LONG) {
3678 return false; // Implementation limitation
3679 }
3680 return true;
3681
3682 case Op_VectorLoadMask:
3683 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3684 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3685 return false;
3686 }
3687 return true;
3688
3689 case Op_AddVI:
3690 case Op_AddVL:
3691 case Op_AddVF:
3692 case Op_AddVD:
3693 case Op_SubVI:
3694 case Op_SubVL:
3695 case Op_SubVF:
3696 case Op_SubVD:
3697 case Op_MulVI:
3698 case Op_MulVF:
3699 case Op_MulVD:
3700 case Op_DivVF:
3701 case Op_DivVD:
3702 case Op_SqrtVF:
3703 case Op_SqrtVD:
3704 case Op_LShiftVI:
3705 case Op_LShiftVL:
3706 case Op_RShiftVI:
3707 case Op_RShiftVL:
3708 case Op_URShiftVI:
3709 case Op_URShiftVL:
3710 case Op_LoadVectorMasked:
3711 case Op_StoreVectorMasked:
3712 case Op_LoadVectorGatherMasked:
3713 case Op_StoreVectorScatterMasked:
3714 return true;
3715
3716 case Op_UMinV:
3717 case Op_UMaxV:
3718 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3719 return false;
3720 } // fallthrough
3721 case Op_MaxV:
3722 case Op_MinV:
3723 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3724 return false; // Implementation limitation
3725 }
3726 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3727 return false; // Implementation limitation
3728 }
3729 return true;
3730 case Op_SaturatingAddV:
3731 case Op_SaturatingSubV:
3732 if (!is_subword_type(bt)) {
3733 return false;
3734 }
3735 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3736 return false; // Implementation limitation
3737 }
3738 return true;
3739
3740 case Op_VectorMaskCmp:
3741 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 case Op_VectorRearrange:
3747 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3748 return false; // Implementation limitation
3749 }
3750 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3751 return false; // Implementation limitation
3752 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3753 return false; // Implementation limitation
3754 }
3755 return true;
3756
3757 // Binary Logical operations
3758 case Op_AndVMask:
3759 case Op_OrVMask:
3760 case Op_XorVMask:
3761 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3762 return false; // Implementation limitation
3763 }
3764 return true;
3765
3766 case Op_PopCountVI:
3767 case Op_PopCountVL:
3768 if (!is_pop_count_instr_target(bt)) {
3769 return false;
3770 }
3771 return true;
3772
3773 case Op_MaskAll:
3774 return true;
3775
3776 case Op_CountLeadingZerosV:
3777 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3778 return true;
3779 }
3780 default:
3781 return false;
3782 }
3783 }
3784
3785 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3786 return false;
3787 }
3788
3789 // Return true if Vector::rearrange needs preparation of the shuffle argument
3790 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3791 switch (elem_bt) {
3792 case T_BYTE: return false;
3793 case T_SHORT: return !VM_Version::supports_avx512bw();
3794 case T_INT: return !VM_Version::supports_avx();
3795 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3796 default:
3797 ShouldNotReachHere();
3798 return false;
3799 }
3800 }
3801
3802 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3803 // Prefer predicate if the mask type is "TypeVectMask".
3804 return vt->isa_vectmask() != nullptr;
3805 }
3806
3807 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3808 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3809 bool legacy = (generic_opnd->opcode() == LEGVEC);
3810 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3811 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3812 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3813 return new legVecZOper();
3814 }
3815 if (legacy) {
3816 switch (ideal_reg) {
3817 case Op_VecS: return new legVecSOper();
3818 case Op_VecD: return new legVecDOper();
3819 case Op_VecX: return new legVecXOper();
3820 case Op_VecY: return new legVecYOper();
3821 case Op_VecZ: return new legVecZOper();
3822 }
3823 } else {
3824 switch (ideal_reg) {
3825 case Op_VecS: return new vecSOper();
3826 case Op_VecD: return new vecDOper();
3827 case Op_VecX: return new vecXOper();
3828 case Op_VecY: return new vecYOper();
3829 case Op_VecZ: return new vecZOper();
3830 }
3831 }
3832 ShouldNotReachHere();
3833 return nullptr;
3834 }
3835
3836 bool Matcher::is_reg2reg_move(MachNode* m) {
3837 switch (m->rule()) {
3838 case MoveVec2Leg_rule:
3839 case MoveLeg2Vec_rule:
3840 case MoveF2VL_rule:
3841 case MoveF2LEG_rule:
3842 case MoveVL2F_rule:
3843 case MoveLEG2F_rule:
3844 case MoveD2VL_rule:
3845 case MoveD2LEG_rule:
3846 case MoveVL2D_rule:
3847 case MoveLEG2D_rule:
3848 return true;
3849 default:
3850 return false;
3851 }
3852 }
3853
3854 bool Matcher::is_generic_vector(MachOper* opnd) {
3855 switch (opnd->opcode()) {
3856 case VEC:
3857 case LEGVEC:
3858 return true;
3859 default:
3860 return false;
3861 }
3862 }
3863
3864 //------------------------------------------------------------------------
3865
3866 const RegMask* Matcher::predicate_reg_mask(void) {
3867 return &_VECTMASK_REG_mask;
3868 }
3869
3870 // Max vector size in bytes. 0 if not supported.
3871 int Matcher::vector_width_in_bytes(BasicType bt) {
3872 assert(is_java_primitive(bt), "only primitive type vectors");
3873 // SSE2 supports 128bit vectors for all types.
3874 // AVX2 supports 256bit vectors for all types.
3875 // AVX2/EVEX supports 512bit vectors for all types.
3876 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3877 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3878 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3879 size = (UseAVX > 2) ? 64 : 32;
3880 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3881 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3882 // Use flag to limit vector size.
3883 size = MIN2(size,(int)MaxVectorSize);
3884 // Minimum 2 values in vector (or 4 for bytes).
3885 switch (bt) {
3886 case T_DOUBLE:
3887 case T_LONG:
3888 if (size < 16) return 0;
3889 break;
3890 case T_FLOAT:
3891 case T_INT:
3892 if (size < 8) return 0;
3893 break;
3894 case T_BOOLEAN:
3895 if (size < 4) return 0;
3896 break;
3897 case T_CHAR:
3898 if (size < 4) return 0;
3899 break;
3900 case T_BYTE:
3901 if (size < 4) return 0;
3902 break;
3903 case T_SHORT:
3904 if (size < 4) return 0;
3905 break;
3906 default:
3907 ShouldNotReachHere();
3908 }
3909 return size;
3910 }
3911
3912 // Limits on vector size (number of elements) loaded into vector.
3913 int Matcher::max_vector_size(const BasicType bt) {
3914 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3915 }
3916 int Matcher::min_vector_size(const BasicType bt) {
3917 int max_size = max_vector_size(bt);
3918 // Min size which can be loaded into vector is 4 bytes.
3919 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3920 // Support for calling svml double64 vectors
3921 if (bt == T_DOUBLE) {
3922 size = 1;
3923 }
3924 return MIN2(size,max_size);
3925 }
3926
3927 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3928 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3929 // by default on Cascade Lake
3930 if (VM_Version::is_default_intel_cascade_lake()) {
3931 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3932 }
3933 return Matcher::max_vector_size(bt);
3934 }
3935
3936 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3937 return -1;
3938 }
3939
3940 // Vector ideal reg corresponding to specified size in bytes
3941 uint Matcher::vector_ideal_reg(int size) {
3942 assert(MaxVectorSize >= size, "");
3943 switch(size) {
3944 case 4: return Op_VecS;
3945 case 8: return Op_VecD;
3946 case 16: return Op_VecX;
3947 case 32: return Op_VecY;
3948 case 64: return Op_VecZ;
3949 }
3950 ShouldNotReachHere();
3951 return 0;
3952 }
3953
3954 // Check for shift by small constant as well
3955 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3956 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3957 shift->in(2)->get_int() <= 3 &&
3958 // Are there other uses besides address expressions?
3959 !matcher->is_visited(shift)) {
3960 address_visited.set(shift->_idx); // Flag as address_visited
3961 mstack.push(shift->in(2), Matcher::Visit);
3962 Node *conv = shift->in(1);
3963 // Allow Matcher to match the rule which bypass
3964 // ConvI2L operation for an array index on LP64
3965 // if the index value is positive.
3966 if (conv->Opcode() == Op_ConvI2L &&
3967 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3968 // Are there other uses besides address expressions?
3969 !matcher->is_visited(conv)) {
3970 address_visited.set(conv->_idx); // Flag as address_visited
3971 mstack.push(conv->in(1), Matcher::Pre_Visit);
3972 } else {
3973 mstack.push(conv, Matcher::Pre_Visit);
3974 }
3975 return true;
3976 }
3977 return false;
3978 }
3979
3980 // This function identifies sub-graphs in which a 'load' node is
3981 // input to two different nodes, and such that it can be matched
3982 // with BMI instructions like blsi, blsr, etc.
3983 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3984 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3985 // refers to the same node.
3986 //
3987 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3988 // This is a temporary solution until we make DAGs expressible in ADL.
3989 template<typename ConType>
3990 class FusedPatternMatcher {
3991 Node* _op1_node;
3992 Node* _mop_node;
3993 int _con_op;
3994
3995 static int match_next(Node* n, int next_op, int next_op_idx) {
3996 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3997 return -1;
3998 }
3999
4000 if (next_op_idx == -1) { // n is commutative, try rotations
4001 if (n->in(1)->Opcode() == next_op) {
4002 return 1;
4003 } else if (n->in(2)->Opcode() == next_op) {
4004 return 2;
4005 }
4006 } else {
4007 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
4008 if (n->in(next_op_idx)->Opcode() == next_op) {
4009 return next_op_idx;
4010 }
4011 }
4012 return -1;
4013 }
4014
4015 public:
4016 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4017 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4018
4019 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4020 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4021 typename ConType::NativeType con_value) {
4022 if (_op1_node->Opcode() != op1) {
4023 return false;
4024 }
4025 if (_mop_node->outcnt() > 2) {
4026 return false;
4027 }
4028 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4029 if (op1_op2_idx == -1) {
4030 return false;
4031 }
4032 // Memory operation must be the other edge
4033 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4034
4035 // Check that the mop node is really what we want
4036 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4037 Node* op2_node = _op1_node->in(op1_op2_idx);
4038 if (op2_node->outcnt() > 1) {
4039 return false;
4040 }
4041 assert(op2_node->Opcode() == op2, "Should be");
4042 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4043 if (op2_con_idx == -1) {
4044 return false;
4045 }
4046 // Memory operation must be the other edge
4047 int op2_mop_idx = (op2_con_idx & 1) + 1;
4048 // Check that the memory operation is the same node
4049 if (op2_node->in(op2_mop_idx) == _mop_node) {
4050 // Now check the constant
4051 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4052 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4053 return true;
4054 }
4055 }
4056 }
4057 return false;
4058 }
4059 };
4060
4061 static bool is_bmi_pattern(Node* n, Node* m) {
4062 assert(UseBMI1Instructions, "sanity");
4063 if (n != nullptr && m != nullptr) {
4064 if (m->Opcode() == Op_LoadI) {
4065 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4066 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4067 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4068 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4069 } else if (m->Opcode() == Op_LoadL) {
4070 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4071 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4072 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4073 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4074 }
4075 }
4076 return false;
4077 }
4078
4079 // Should the matcher clone input 'm' of node 'n'?
4080 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4081 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4082 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4083 mstack.push(m, Visit);
4084 return true;
4085 }
4086 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4087 mstack.push(m, Visit); // m = ShiftCntV
4088 return true;
4089 }
4090 if (is_encode_and_store_pattern(n, m)) {
4091 mstack.push(m, Visit);
4092 return true;
4093 }
4094 return false;
4095 }
4096
4097 // Should the Matcher clone shifts on addressing modes, expecting them
4098 // to be subsumed into complex addressing expressions or compute them
4099 // into registers?
4100 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4101 Node *off = m->in(AddPNode::Offset);
4102 if (off->is_Con()) {
4103 address_visited.test_set(m->_idx); // Flag as address_visited
4104 Node *adr = m->in(AddPNode::Address);
4105
4106 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4107 // AtomicAdd is not an addressing expression.
4108 // Cheap to find it by looking for screwy base.
4109 if (adr->is_AddP() &&
4110 !adr->in(AddPNode::Base)->is_top() &&
4111 !adr->in(AddPNode::Offset)->is_Con() &&
4112 off->get_long() == (int) (off->get_long()) && // immL32
4113 // Are there other uses besides address expressions?
4114 !is_visited(adr)) {
4115 address_visited.set(adr->_idx); // Flag as address_visited
4116 Node *shift = adr->in(AddPNode::Offset);
4117 if (!clone_shift(shift, this, mstack, address_visited)) {
4118 mstack.push(shift, Pre_Visit);
4119 }
4120 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4121 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4122 } else {
4123 mstack.push(adr, Pre_Visit);
4124 }
4125
4126 // Clone X+offset as it also folds into most addressing expressions
4127 mstack.push(off, Visit);
4128 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4129 return true;
4130 } else if (clone_shift(off, this, mstack, address_visited)) {
4131 address_visited.test_set(m->_idx); // Flag as address_visited
4132 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4133 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4134 return true;
4135 }
4136 return false;
4137 }
4138
4139 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4140 switch (bt) {
4141 case BoolTest::eq:
4142 return Assembler::eq;
4143 case BoolTest::ne:
4144 return Assembler::neq;
4145 case BoolTest::le:
4146 case BoolTest::ule:
4147 return Assembler::le;
4148 case BoolTest::ge:
4149 case BoolTest::uge:
4150 return Assembler::nlt;
4151 case BoolTest::lt:
4152 case BoolTest::ult:
4153 return Assembler::lt;
4154 case BoolTest::gt:
4155 case BoolTest::ugt:
4156 return Assembler::nle;
4157 default : ShouldNotReachHere(); return Assembler::_false;
4158 }
4159 }
4160
4161 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4162 switch (bt) {
4163 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4164 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4165 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4166 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4167 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4168 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4169 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4170 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4171 }
4172 }
4173
4174 // Helper methods for MachSpillCopyNode::implementation().
4175 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4176 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4177 assert(ireg == Op_VecS || // 32bit vector
4178 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4179 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4180 "no non-adjacent vector moves" );
4181 if (masm) {
4182 switch (ireg) {
4183 case Op_VecS: // copy whole register
4184 case Op_VecD:
4185 case Op_VecX:
4186 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4187 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4188 } else {
4189 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4190 }
4191 break;
4192 case Op_VecY:
4193 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4194 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4195 } else {
4196 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4197 }
4198 break;
4199 case Op_VecZ:
4200 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4201 break;
4202 default:
4203 ShouldNotReachHere();
4204 }
4205 #ifndef PRODUCT
4206 } else {
4207 switch (ireg) {
4208 case Op_VecS:
4209 case Op_VecD:
4210 case Op_VecX:
4211 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4212 break;
4213 case Op_VecY:
4214 case Op_VecZ:
4215 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4216 break;
4217 default:
4218 ShouldNotReachHere();
4219 }
4220 #endif
4221 }
4222 }
4223
4224 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4225 int stack_offset, int reg, uint ireg, outputStream* st) {
4226 if (masm) {
4227 if (is_load) {
4228 switch (ireg) {
4229 case Op_VecS:
4230 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4231 break;
4232 case Op_VecD:
4233 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4234 break;
4235 case Op_VecX:
4236 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4237 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4238 } else {
4239 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4240 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4241 }
4242 break;
4243 case Op_VecY:
4244 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4245 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4246 } else {
4247 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4248 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4249 }
4250 break;
4251 case Op_VecZ:
4252 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4253 break;
4254 default:
4255 ShouldNotReachHere();
4256 }
4257 } else { // store
4258 switch (ireg) {
4259 case Op_VecS:
4260 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4261 break;
4262 case Op_VecD:
4263 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4264 break;
4265 case Op_VecX:
4266 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4267 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4268 }
4269 else {
4270 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4271 }
4272 break;
4273 case Op_VecY:
4274 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4275 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4276 }
4277 else {
4278 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4279 }
4280 break;
4281 case Op_VecZ:
4282 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4283 break;
4284 default:
4285 ShouldNotReachHere();
4286 }
4287 }
4288 #ifndef PRODUCT
4289 } else {
4290 if (is_load) {
4291 switch (ireg) {
4292 case Op_VecS:
4293 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4294 break;
4295 case Op_VecD:
4296 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4297 break;
4298 case Op_VecX:
4299 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4300 break;
4301 case Op_VecY:
4302 case Op_VecZ:
4303 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4304 break;
4305 default:
4306 ShouldNotReachHere();
4307 }
4308 } else { // store
4309 switch (ireg) {
4310 case Op_VecS:
4311 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4312 break;
4313 case Op_VecD:
4314 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4315 break;
4316 case Op_VecX:
4317 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4318 break;
4319 case Op_VecY:
4320 case Op_VecZ:
4321 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4322 break;
4323 default:
4324 ShouldNotReachHere();
4325 }
4326 }
4327 #endif
4328 }
4329 }
4330
4331 template <class T>
4332 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4333 int size = type2aelembytes(bt) * len;
4334 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4335 for (int i = 0; i < len; i++) {
4336 int offset = i * type2aelembytes(bt);
4337 switch (bt) {
4338 case T_BYTE: val->at(i) = con; break;
4339 case T_SHORT: {
4340 jshort c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4342 break;
4343 }
4344 case T_INT: {
4345 jint c = con;
4346 memcpy(val->adr_at(offset), &c, sizeof(jint));
4347 break;
4348 }
4349 case T_LONG: {
4350 jlong c = con;
4351 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4352 break;
4353 }
4354 case T_FLOAT: {
4355 jfloat c = con;
4356 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4357 break;
4358 }
4359 case T_DOUBLE: {
4360 jdouble c = con;
4361 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4362 break;
4363 }
4364 default: assert(false, "%s", type2name(bt));
4365 }
4366 }
4367 return val;
4368 }
4369
4370 static inline jlong high_bit_set(BasicType bt) {
4371 switch (bt) {
4372 case T_BYTE: return 0x8080808080808080;
4373 case T_SHORT: return 0x8000800080008000;
4374 case T_INT: return 0x8000000080000000;
4375 case T_LONG: return 0x8000000000000000;
4376 default:
4377 ShouldNotReachHere();
4378 return 0;
4379 }
4380 }
4381
4382 #ifndef PRODUCT
4383 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4384 st->print("nop \t# %d bytes pad for loops and calls", _count);
4385 }
4386 #endif
4387
4388 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4389 __ nop(_count);
4390 }
4391
4392 uint MachNopNode::size(PhaseRegAlloc*) const {
4393 return _count;
4394 }
4395
4396 #ifndef PRODUCT
4397 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4398 st->print("# breakpoint");
4399 }
4400 #endif
4401
4402 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4403 __ int3();
4404 }
4405
4406 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4407 return MachNode::size(ra_);
4408 }
4409
4410 %}
4411
4412 //----------ENCODING BLOCK-----------------------------------------------------
4413 // This block specifies the encoding classes used by the compiler to
4414 // output byte streams. Encoding classes are parameterized macros
4415 // used by Machine Instruction Nodes in order to generate the bit
4416 // encoding of the instruction. Operands specify their base encoding
4417 // interface with the interface keyword. There are currently
4418 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4419 // COND_INTER. REG_INTER causes an operand to generate a function
4420 // which returns its register number when queried. CONST_INTER causes
4421 // an operand to generate a function which returns the value of the
4422 // constant when queried. MEMORY_INTER causes an operand to generate
4423 // four functions which return the Base Register, the Index Register,
4424 // the Scale Value, and the Offset Value of the operand when queried.
4425 // COND_INTER causes an operand to generate six functions which return
4426 // the encoding code (ie - encoding bits for the instruction)
4427 // associated with each basic boolean condition for a conditional
4428 // instruction.
4429 //
4430 // Instructions specify two basic values for encoding. Again, a
4431 // function is available to check if the constant displacement is an
4432 // oop. They use the ins_encode keyword to specify their encoding
4433 // classes (which must be a sequence of enc_class names, and their
4434 // parameters, specified in the encoding block), and they use the
4435 // opcode keyword to specify, in order, their primary, secondary, and
4436 // tertiary opcode. Only the opcode sections which a particular
4437 // instruction needs for encoding need to be specified.
4438 encode %{
4439 enc_class cdql_enc(no_rax_rdx_RegI div)
4440 %{
4441 // Full implementation of Java idiv and irem; checks for
4442 // special case as described in JVM spec., p.243 & p.271.
4443 //
4444 // normal case special case
4445 //
4446 // input : rax: dividend min_int
4447 // reg: divisor -1
4448 //
4449 // output: rax: quotient (= rax idiv reg) min_int
4450 // rdx: remainder (= rax irem reg) 0
4451 //
4452 // Code sequnce:
4453 //
4454 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4455 // 5: 75 07/08 jne e <normal>
4456 // 7: 33 d2 xor %edx,%edx
4457 // [div >= 8 -> offset + 1]
4458 // [REX_B]
4459 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4460 // c: 74 03/04 je 11 <done>
4461 // 000000000000000e <normal>:
4462 // e: 99 cltd
4463 // [div >= 8 -> offset + 1]
4464 // [REX_B]
4465 // f: f7 f9 idiv $div
4466 // 0000000000000011 <done>:
4467 Label normal;
4468 Label done;
4469
4470 // cmp $0x80000000,%eax
4471 __ cmpl(as_Register(RAX_enc), 0x80000000);
4472
4473 // jne e <normal>
4474 __ jccb(Assembler::notEqual, normal);
4475
4476 // xor %edx,%edx
4477 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4478
4479 // cmp $0xffffffffffffffff,%ecx
4480 __ cmpl($div$$Register, -1);
4481
4482 // je 11 <done>
4483 __ jccb(Assembler::equal, done);
4484
4485 // <normal>
4486 // cltd
4487 __ bind(normal);
4488 __ cdql();
4489
4490 // idivl
4491 // <done>
4492 __ idivl($div$$Register);
4493 __ bind(done);
4494 %}
4495
4496 enc_class cdqq_enc(no_rax_rdx_RegL div)
4497 %{
4498 // Full implementation of Java ldiv and lrem; checks for
4499 // special case as described in JVM spec., p.243 & p.271.
4500 //
4501 // normal case special case
4502 //
4503 // input : rax: dividend min_long
4504 // reg: divisor -1
4505 //
4506 // output: rax: quotient (= rax idiv reg) min_long
4507 // rdx: remainder (= rax irem reg) 0
4508 //
4509 // Code sequnce:
4510 //
4511 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4512 // 7: 00 00 80
4513 // a: 48 39 d0 cmp %rdx,%rax
4514 // d: 75 08 jne 17 <normal>
4515 // f: 33 d2 xor %edx,%edx
4516 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4517 // 15: 74 05 je 1c <done>
4518 // 0000000000000017 <normal>:
4519 // 17: 48 99 cqto
4520 // 19: 48 f7 f9 idiv $div
4521 // 000000000000001c <done>:
4522 Label normal;
4523 Label done;
4524
4525 // mov $0x8000000000000000,%rdx
4526 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4527
4528 // cmp %rdx,%rax
4529 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4530
4531 // jne 17 <normal>
4532 __ jccb(Assembler::notEqual, normal);
4533
4534 // xor %edx,%edx
4535 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4536
4537 // cmp $0xffffffffffffffff,$div
4538 __ cmpq($div$$Register, -1);
4539
4540 // je 1e <done>
4541 __ jccb(Assembler::equal, done);
4542
4543 // <normal>
4544 // cqto
4545 __ bind(normal);
4546 __ cdqq();
4547
4548 // idivq (note: must be emitted by the user of this rule)
4549 // <done>
4550 __ idivq($div$$Register);
4551 __ bind(done);
4552 %}
4553
4554 enc_class clear_avx %{
4555 DEBUG_ONLY(int off0 = __ offset());
4556 if (generate_vzeroupper(Compile::current())) {
4557 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4558 // Clear upper bits of YMM registers when current compiled code uses
4559 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4560 __ vzeroupper();
4561 }
4562 DEBUG_ONLY(int off1 = __ offset());
4563 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4564 %}
4565
4566 enc_class Java_To_Runtime(method meth) %{
4567 __ lea(r10, RuntimeAddress((address)$meth$$method));
4568 __ call(r10);
4569 __ post_call_nop();
4570 %}
4571
4572 enc_class Java_Static_Call(method meth)
4573 %{
4574 // JAVA STATIC CALL
4575 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4576 // determine who we intended to call.
4577 if (!_method) {
4578 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4579 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4580 // The NOP here is purely to ensure that eliding a call to
4581 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4582 __ addr_nop_5();
4583 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4584 } else {
4585 int method_index = resolved_method_index(masm);
4586 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4587 : static_call_Relocation::spec(method_index);
4588 address mark = __ pc();
4589 int call_offset = __ offset();
4590 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4591 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4592 // Calls of the same statically bound method can share
4593 // a stub to the interpreter.
4594 __ code()->shared_stub_to_interp_for(_method, call_offset);
4595 } else {
4596 // Emit stubs for static call.
4597 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4598 __ clear_inst_mark();
4599 if (stub == nullptr) {
4600 ciEnv::current()->record_failure("CodeCache is full");
4601 return;
4602 }
4603 }
4604 }
4605 __ post_call_nop();
4606 %}
4607
4608 enc_class Java_Dynamic_Call(method meth) %{
4609 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4610 __ post_call_nop();
4611 %}
4612
4613 enc_class call_epilog %{
4614 if (VerifyStackAtCalls) {
4615 // Check that stack depth is unchanged: find majik cookie on stack
4616 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4617 Label L;
4618 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4619 __ jccb(Assembler::equal, L);
4620 // Die if stack mismatch
4621 __ int3();
4622 __ bind(L);
4623 }
4624 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4625 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4626 // Search for the corresponding projection, get the register and emit code that initialized it.
4627 uint con = (tf()->range_cc()->cnt() - 1);
4628 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4629 ProjNode* proj = fast_out(i)->as_Proj();
4630 if (proj->_con == con) {
4631 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4632 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4633 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4634 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4635 __ testq(rax, rax);
4636 __ setb(Assembler::notZero, toReg);
4637 __ movzbl(toReg, toReg);
4638 if (reg->is_stack()) {
4639 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4640 __ movq(Address(rsp, st_off), toReg);
4641 }
4642 break;
4643 }
4644 }
4645 if (return_value_is_used()) {
4646 // An inline type is returned as fields in multiple registers.
4647 // Rax either contains an oop if the inline type is buffered or a pointer
4648 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4649 // if the lowest bit is set to allow C2 to use the oop after null checking.
4650 // rax &= (rax & 1) - 1
4651 __ movptr(rscratch1, rax);
4652 __ andptr(rscratch1, 0x1);
4653 __ subptr(rscratch1, 0x1);
4654 __ andptr(rax, rscratch1);
4655 }
4656 }
4657 %}
4658
4659 %}
4660
4661 //----------FRAME--------------------------------------------------------------
4662 // Definition of frame structure and management information.
4663 //
4664 // S T A C K L A Y O U T Allocators stack-slot number
4665 // | (to get allocators register number
4666 // G Owned by | | v add OptoReg::stack0())
4667 // r CALLER | |
4668 // o | +--------+ pad to even-align allocators stack-slot
4669 // w V | pad0 | numbers; owned by CALLER
4670 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4671 // h ^ | in | 5
4672 // | | args | 4 Holes in incoming args owned by SELF
4673 // | | | | 3
4674 // | | +--------+
4675 // V | | old out| Empty on Intel, window on Sparc
4676 // | old |preserve| Must be even aligned.
4677 // | SP-+--------+----> Matcher::_old_SP, even aligned
4678 // | | in | 3 area for Intel ret address
4679 // Owned by |preserve| Empty on Sparc.
4680 // SELF +--------+
4681 // | | pad2 | 2 pad to align old SP
4682 // | +--------+ 1
4683 // | | locks | 0
4684 // | +--------+----> OptoReg::stack0(), even aligned
4685 // | | pad1 | 11 pad to align new SP
4686 // | +--------+
4687 // | | | 10
4688 // | | spills | 9 spills
4689 // V | | 8 (pad0 slot for callee)
4690 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4691 // ^ | out | 7
4692 // | | args | 6 Holes in outgoing args owned by CALLEE
4693 // Owned by +--------+
4694 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4695 // | new |preserve| Must be even-aligned.
4696 // | SP-+--------+----> Matcher::_new_SP, even aligned
4697 // | | |
4698 //
4699 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4700 // known from SELF's arguments and the Java calling convention.
4701 // Region 6-7 is determined per call site.
4702 // Note 2: If the calling convention leaves holes in the incoming argument
4703 // area, those holes are owned by SELF. Holes in the outgoing area
4704 // are owned by the CALLEE. Holes should not be necessary in the
4705 // incoming area, as the Java calling convention is completely under
4706 // the control of the AD file. Doubles can be sorted and packed to
4707 // avoid holes. Holes in the outgoing arguments may be necessary for
4708 // varargs C calling conventions.
4709 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4710 // even aligned with pad0 as needed.
4711 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4712 // region 6-11 is even aligned; it may be padded out more so that
4713 // the region from SP to FP meets the minimum stack alignment.
4714 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4715 // alignment. Region 11, pad1, may be dynamically extended so that
4716 // SP meets the minimum alignment.
4717
4718 frame
4719 %{
4720 // These three registers define part of the calling convention
4721 // between compiled code and the interpreter.
4722 inline_cache_reg(RAX); // Inline Cache Register
4723
4724 // Optional: name the operand used by cisc-spilling to access
4725 // [stack_pointer + offset]
4726 cisc_spilling_operand_name(indOffset32);
4727
4728 // Number of stack slots consumed by locking an object
4729 sync_stack_slots(2);
4730
4731 // Compiled code's Frame Pointer
4732 frame_pointer(RSP);
4733
4734 // Stack alignment requirement
4735 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4736
4737 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4738 // for calls to C. Supports the var-args backing area for register parms.
4739 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4740
4741 // The after-PROLOG location of the return address. Location of
4742 // return address specifies a type (REG or STACK) and a number
4743 // representing the register number (i.e. - use a register name) or
4744 // stack slot.
4745 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4746 // Otherwise, it is above the locks and verification slot and alignment word
4747 return_addr(STACK - 2 +
4748 align_up((Compile::current()->in_preserve_stack_slots() +
4749 Compile::current()->fixed_slots()),
4750 stack_alignment_in_slots()));
4751
4752 // Location of compiled Java return values. Same as C for now.
4753 return_value
4754 %{
4755 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4756 "only return normal values");
4757
4758 static const int lo[Op_RegL + 1] = {
4759 0,
4760 0,
4761 RAX_num, // Op_RegN
4762 RAX_num, // Op_RegI
4763 RAX_num, // Op_RegP
4764 XMM0_num, // Op_RegF
4765 XMM0_num, // Op_RegD
4766 RAX_num // Op_RegL
4767 };
4768 static const int hi[Op_RegL + 1] = {
4769 0,
4770 0,
4771 OptoReg::Bad, // Op_RegN
4772 OptoReg::Bad, // Op_RegI
4773 RAX_H_num, // Op_RegP
4774 OptoReg::Bad, // Op_RegF
4775 XMM0b_num, // Op_RegD
4776 RAX_H_num // Op_RegL
4777 };
4778 // Excluded flags and vector registers.
4779 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4780 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4781 %}
4782 %}
4783
4784 //----------ATTRIBUTES---------------------------------------------------------
4785 //----------Operand Attributes-------------------------------------------------
4786 op_attrib op_cost(0); // Required cost attribute
4787
4788 //----------Instruction Attributes---------------------------------------------
4789 ins_attrib ins_cost(100); // Required cost attribute
4790 ins_attrib ins_size(8); // Required size attribute (in bits)
4791 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4792 // a non-matching short branch variant
4793 // of some long branch?
4794 ins_attrib ins_alignment(1); // Required alignment attribute (must
4795 // be a power of 2) specifies the
4796 // alignment that some part of the
4797 // instruction (not necessarily the
4798 // start) requires. If > 1, a
4799 // compute_padding() function must be
4800 // provided for the instruction
4801
4802 // Whether this node is expanded during code emission into a sequence of
4803 // instructions and the first instruction can perform an implicit null check.
4804 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4805
4806 //----------OPERANDS-----------------------------------------------------------
4807 // Operand definitions must precede instruction definitions for correct parsing
4808 // in the ADLC because operands constitute user defined types which are used in
4809 // instruction definitions.
4810
4811 //----------Simple Operands----------------------------------------------------
4812 // Immediate Operands
4813 // Integer Immediate
4814 operand immI()
4815 %{
4816 match(ConI);
4817
4818 op_cost(10);
4819 format %{ %}
4820 interface(CONST_INTER);
4821 %}
4822
4823 // Constant for test vs zero
4824 operand immI_0()
4825 %{
4826 predicate(n->get_int() == 0);
4827 match(ConI);
4828
4829 op_cost(0);
4830 format %{ %}
4831 interface(CONST_INTER);
4832 %}
4833
4834 // Constant for increment
4835 operand immI_1()
4836 %{
4837 predicate(n->get_int() == 1);
4838 match(ConI);
4839
4840 op_cost(0);
4841 format %{ %}
4842 interface(CONST_INTER);
4843 %}
4844
4845 // Constant for decrement
4846 operand immI_M1()
4847 %{
4848 predicate(n->get_int() == -1);
4849 match(ConI);
4850
4851 op_cost(0);
4852 format %{ %}
4853 interface(CONST_INTER);
4854 %}
4855
4856 operand immI_2()
4857 %{
4858 predicate(n->get_int() == 2);
4859 match(ConI);
4860
4861 op_cost(0);
4862 format %{ %}
4863 interface(CONST_INTER);
4864 %}
4865
4866 operand immI_4()
4867 %{
4868 predicate(n->get_int() == 4);
4869 match(ConI);
4870
4871 op_cost(0);
4872 format %{ %}
4873 interface(CONST_INTER);
4874 %}
4875
4876 operand immI_8()
4877 %{
4878 predicate(n->get_int() == 8);
4879 match(ConI);
4880
4881 op_cost(0);
4882 format %{ %}
4883 interface(CONST_INTER);
4884 %}
4885
4886 // Valid scale values for addressing modes
4887 operand immI2()
4888 %{
4889 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4890 match(ConI);
4891
4892 format %{ %}
4893 interface(CONST_INTER);
4894 %}
4895
4896 operand immU7()
4897 %{
4898 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4899 match(ConI);
4900
4901 op_cost(5);
4902 format %{ %}
4903 interface(CONST_INTER);
4904 %}
4905
4906 operand immI8()
4907 %{
4908 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4909 match(ConI);
4910
4911 op_cost(5);
4912 format %{ %}
4913 interface(CONST_INTER);
4914 %}
4915
4916 operand immU8()
4917 %{
4918 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4919 match(ConI);
4920
4921 op_cost(5);
4922 format %{ %}
4923 interface(CONST_INTER);
4924 %}
4925
4926 operand immI16()
4927 %{
4928 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4929 match(ConI);
4930
4931 op_cost(10);
4932 format %{ %}
4933 interface(CONST_INTER);
4934 %}
4935
4936 // Int Immediate non-negative
4937 operand immU31()
4938 %{
4939 predicate(n->get_int() >= 0);
4940 match(ConI);
4941
4942 op_cost(0);
4943 format %{ %}
4944 interface(CONST_INTER);
4945 %}
4946
4947 // Pointer Immediate
4948 operand immP()
4949 %{
4950 match(ConP);
4951
4952 op_cost(10);
4953 format %{ %}
4954 interface(CONST_INTER);
4955 %}
4956
4957 // Null Pointer Immediate
4958 operand immP0()
4959 %{
4960 predicate(n->get_ptr() == 0);
4961 match(ConP);
4962
4963 op_cost(5);
4964 format %{ %}
4965 interface(CONST_INTER);
4966 %}
4967
4968 // Pointer Immediate
4969 operand immN() %{
4970 match(ConN);
4971
4972 op_cost(10);
4973 format %{ %}
4974 interface(CONST_INTER);
4975 %}
4976
4977 operand immNKlass() %{
4978 match(ConNKlass);
4979
4980 op_cost(10);
4981 format %{ %}
4982 interface(CONST_INTER);
4983 %}
4984
4985 // Null Pointer Immediate
4986 operand immN0() %{
4987 predicate(n->get_narrowcon() == 0);
4988 match(ConN);
4989
4990 op_cost(5);
4991 format %{ %}
4992 interface(CONST_INTER);
4993 %}
4994
4995 operand immP31()
4996 %{
4997 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4998 && (n->get_ptr() >> 31) == 0);
4999 match(ConP);
5000
5001 op_cost(5);
5002 format %{ %}
5003 interface(CONST_INTER);
5004 %}
5005
5006
5007 // Long Immediate
5008 operand immL()
5009 %{
5010 match(ConL);
5011
5012 op_cost(20);
5013 format %{ %}
5014 interface(CONST_INTER);
5015 %}
5016
5017 // Long Immediate 8-bit
5018 operand immL8()
5019 %{
5020 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5021 match(ConL);
5022
5023 op_cost(5);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Long Immediate 32-bit unsigned
5029 operand immUL32()
5030 %{
5031 predicate(n->get_long() == (unsigned int) (n->get_long()));
5032 match(ConL);
5033
5034 op_cost(10);
5035 format %{ %}
5036 interface(CONST_INTER);
5037 %}
5038
5039 // Long Immediate 32-bit signed
5040 operand immL32()
5041 %{
5042 predicate(n->get_long() == (int) (n->get_long()));
5043 match(ConL);
5044
5045 op_cost(15);
5046 format %{ %}
5047 interface(CONST_INTER);
5048 %}
5049
5050 operand immL_Pow2()
5051 %{
5052 predicate(is_power_of_2((julong)n->get_long()));
5053 match(ConL);
5054
5055 op_cost(15);
5056 format %{ %}
5057 interface(CONST_INTER);
5058 %}
5059
5060 operand immL_NotPow2()
5061 %{
5062 predicate(is_power_of_2((julong)~n->get_long()));
5063 match(ConL);
5064
5065 op_cost(15);
5066 format %{ %}
5067 interface(CONST_INTER);
5068 %}
5069
5070 // Long Immediate zero
5071 operand immL0()
5072 %{
5073 predicate(n->get_long() == 0L);
5074 match(ConL);
5075
5076 op_cost(10);
5077 format %{ %}
5078 interface(CONST_INTER);
5079 %}
5080
5081 // Constant for increment
5082 operand immL1()
5083 %{
5084 predicate(n->get_long() == 1);
5085 match(ConL);
5086
5087 format %{ %}
5088 interface(CONST_INTER);
5089 %}
5090
5091 // Constant for decrement
5092 operand immL_M1()
5093 %{
5094 predicate(n->get_long() == -1);
5095 match(ConL);
5096
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Long Immediate: low 32-bit mask
5102 operand immL_32bits()
5103 %{
5104 predicate(n->get_long() == 0xFFFFFFFFL);
5105 match(ConL);
5106 op_cost(20);
5107
5108 format %{ %}
5109 interface(CONST_INTER);
5110 %}
5111
5112 // Int Immediate: 2^n-1, positive
5113 operand immI_Pow2M1()
5114 %{
5115 predicate((n->get_int() > 0)
5116 && is_power_of_2((juint)n->get_int() + 1));
5117 match(ConI);
5118
5119 op_cost(20);
5120 format %{ %}
5121 interface(CONST_INTER);
5122 %}
5123
5124 // Float Immediate zero
5125 operand immF0()
5126 %{
5127 predicate(jint_cast(n->getf()) == 0);
5128 match(ConF);
5129
5130 op_cost(5);
5131 format %{ %}
5132 interface(CONST_INTER);
5133 %}
5134
5135 // Float Immediate
5136 operand immF()
5137 %{
5138 match(ConF);
5139
5140 op_cost(15);
5141 format %{ %}
5142 interface(CONST_INTER);
5143 %}
5144
5145 // Half Float Immediate
5146 operand immH()
5147 %{
5148 match(ConH);
5149
5150 op_cost(15);
5151 format %{ %}
5152 interface(CONST_INTER);
5153 %}
5154
5155 // Double Immediate zero
5156 operand immD0()
5157 %{
5158 predicate(jlong_cast(n->getd()) == 0);
5159 match(ConD);
5160
5161 op_cost(5);
5162 format %{ %}
5163 interface(CONST_INTER);
5164 %}
5165
5166 // Double Immediate
5167 operand immD()
5168 %{
5169 match(ConD);
5170
5171 op_cost(15);
5172 format %{ %}
5173 interface(CONST_INTER);
5174 %}
5175
5176 // Immediates for special shifts (sign extend)
5177
5178 // Constants for increment
5179 operand immI_16()
5180 %{
5181 predicate(n->get_int() == 16);
5182 match(ConI);
5183
5184 format %{ %}
5185 interface(CONST_INTER);
5186 %}
5187
5188 operand immI_24()
5189 %{
5190 predicate(n->get_int() == 24);
5191 match(ConI);
5192
5193 format %{ %}
5194 interface(CONST_INTER);
5195 %}
5196
5197 // Constant for byte-wide masking
5198 operand immI_255()
5199 %{
5200 predicate(n->get_int() == 255);
5201 match(ConI);
5202
5203 format %{ %}
5204 interface(CONST_INTER);
5205 %}
5206
5207 // Constant for short-wide masking
5208 operand immI_65535()
5209 %{
5210 predicate(n->get_int() == 65535);
5211 match(ConI);
5212
5213 format %{ %}
5214 interface(CONST_INTER);
5215 %}
5216
5217 // Constant for byte-wide masking
5218 operand immL_255()
5219 %{
5220 predicate(n->get_long() == 255);
5221 match(ConL);
5222
5223 format %{ %}
5224 interface(CONST_INTER);
5225 %}
5226
5227 // Constant for short-wide masking
5228 operand immL_65535()
5229 %{
5230 predicate(n->get_long() == 65535);
5231 match(ConL);
5232
5233 format %{ %}
5234 interface(CONST_INTER);
5235 %}
5236
5237 // AOT Runtime Constants Address
5238 operand immAOTRuntimeConstantsAddress()
5239 %{
5240 // Check if the address is in the range of AOT Runtime Constants
5241 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5242 match(ConP);
5243
5244 op_cost(0);
5245 format %{ %}
5246 interface(CONST_INTER);
5247 %}
5248
5249 operand kReg()
5250 %{
5251 constraint(ALLOC_IN_RC(vectmask_reg));
5252 match(RegVectMask);
5253 format %{%}
5254 interface(REG_INTER);
5255 %}
5256
5257 // Register Operands
5258 // Integer Register
5259 operand rRegI()
5260 %{
5261 constraint(ALLOC_IN_RC(int_reg));
5262 match(RegI);
5263
5264 match(rax_RegI);
5265 match(rbx_RegI);
5266 match(rcx_RegI);
5267 match(rdx_RegI);
5268 match(rdi_RegI);
5269
5270 format %{ %}
5271 interface(REG_INTER);
5272 %}
5273
5274 // Special Registers
5275 operand rax_RegI()
5276 %{
5277 constraint(ALLOC_IN_RC(int_rax_reg));
5278 match(RegI);
5279 match(rRegI);
5280
5281 format %{ "RAX" %}
5282 interface(REG_INTER);
5283 %}
5284
5285 // Special Registers
5286 operand rbx_RegI()
5287 %{
5288 constraint(ALLOC_IN_RC(int_rbx_reg));
5289 match(RegI);
5290 match(rRegI);
5291
5292 format %{ "RBX" %}
5293 interface(REG_INTER);
5294 %}
5295
5296 operand rcx_RegI()
5297 %{
5298 constraint(ALLOC_IN_RC(int_rcx_reg));
5299 match(RegI);
5300 match(rRegI);
5301
5302 format %{ "RCX" %}
5303 interface(REG_INTER);
5304 %}
5305
5306 operand rdx_RegI()
5307 %{
5308 constraint(ALLOC_IN_RC(int_rdx_reg));
5309 match(RegI);
5310 match(rRegI);
5311
5312 format %{ "RDX" %}
5313 interface(REG_INTER);
5314 %}
5315
5316 operand rdi_RegI()
5317 %{
5318 constraint(ALLOC_IN_RC(int_rdi_reg));
5319 match(RegI);
5320 match(rRegI);
5321
5322 format %{ "RDI" %}
5323 interface(REG_INTER);
5324 %}
5325
5326 operand no_rax_rdx_RegI()
5327 %{
5328 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5329 match(RegI);
5330 match(rbx_RegI);
5331 match(rcx_RegI);
5332 match(rdi_RegI);
5333
5334 format %{ %}
5335 interface(REG_INTER);
5336 %}
5337
5338 operand no_rbp_r13_RegI()
5339 %{
5340 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5341 match(RegI);
5342 match(rRegI);
5343 match(rax_RegI);
5344 match(rbx_RegI);
5345 match(rcx_RegI);
5346 match(rdx_RegI);
5347 match(rdi_RegI);
5348
5349 format %{ %}
5350 interface(REG_INTER);
5351 %}
5352
5353 // Pointer Register
5354 operand any_RegP()
5355 %{
5356 constraint(ALLOC_IN_RC(any_reg));
5357 match(RegP);
5358 match(rax_RegP);
5359 match(rbx_RegP);
5360 match(rdi_RegP);
5361 match(rsi_RegP);
5362 match(rbp_RegP);
5363 match(r15_RegP);
5364 match(rRegP);
5365
5366 format %{ %}
5367 interface(REG_INTER);
5368 %}
5369
5370 operand rRegP()
5371 %{
5372 constraint(ALLOC_IN_RC(ptr_reg));
5373 match(RegP);
5374 match(rax_RegP);
5375 match(rbx_RegP);
5376 match(rdi_RegP);
5377 match(rsi_RegP);
5378 match(rbp_RegP); // See Q&A below about
5379 match(r15_RegP); // r15_RegP and rbp_RegP.
5380
5381 format %{ %}
5382 interface(REG_INTER);
5383 %}
5384
5385 operand rRegN() %{
5386 constraint(ALLOC_IN_RC(int_reg));
5387 match(RegN);
5388
5389 format %{ %}
5390 interface(REG_INTER);
5391 %}
5392
5393 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5394 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5395 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5396 // The output of an instruction is controlled by the allocator, which respects
5397 // register class masks, not match rules. Unless an instruction mentions
5398 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5399 // by the allocator as an input.
5400 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5401 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5402 // result, RBP is not included in the output of the instruction either.
5403
5404 // This operand is not allowed to use RBP even if
5405 // RBP is not used to hold the frame pointer.
5406 operand no_rbp_RegP()
5407 %{
5408 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5409 match(RegP);
5410 match(rbx_RegP);
5411 match(rsi_RegP);
5412 match(rdi_RegP);
5413
5414 format %{ %}
5415 interface(REG_INTER);
5416 %}
5417
5418 // Special Registers
5419 // Return a pointer value
5420 operand rax_RegP()
5421 %{
5422 constraint(ALLOC_IN_RC(ptr_rax_reg));
5423 match(RegP);
5424 match(rRegP);
5425
5426 format %{ %}
5427 interface(REG_INTER);
5428 %}
5429
5430 // Special Registers
5431 // Return a compressed pointer value
5432 operand rax_RegN()
5433 %{
5434 constraint(ALLOC_IN_RC(int_rax_reg));
5435 match(RegN);
5436 match(rRegN);
5437
5438 format %{ %}
5439 interface(REG_INTER);
5440 %}
5441
5442 // Used in AtomicAdd
5443 operand rbx_RegP()
5444 %{
5445 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5446 match(RegP);
5447 match(rRegP);
5448
5449 format %{ %}
5450 interface(REG_INTER);
5451 %}
5452
5453 operand rsi_RegP()
5454 %{
5455 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5456 match(RegP);
5457 match(rRegP);
5458
5459 format %{ %}
5460 interface(REG_INTER);
5461 %}
5462
5463 operand rbp_RegP()
5464 %{
5465 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5466 match(RegP);
5467 match(rRegP);
5468
5469 format %{ %}
5470 interface(REG_INTER);
5471 %}
5472
5473 // Used in rep stosq
5474 operand rdi_RegP()
5475 %{
5476 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5477 match(RegP);
5478 match(rRegP);
5479
5480 format %{ %}
5481 interface(REG_INTER);
5482 %}
5483
5484 operand r15_RegP()
5485 %{
5486 constraint(ALLOC_IN_RC(ptr_r15_reg));
5487 match(RegP);
5488 match(rRegP);
5489
5490 format %{ %}
5491 interface(REG_INTER);
5492 %}
5493
5494 operand rRegL()
5495 %{
5496 constraint(ALLOC_IN_RC(long_reg));
5497 match(RegL);
5498 match(rax_RegL);
5499 match(rdx_RegL);
5500
5501 format %{ %}
5502 interface(REG_INTER);
5503 %}
5504
5505 // Special Registers
5506 operand no_rax_rdx_RegL()
5507 %{
5508 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5509 match(RegL);
5510 match(rRegL);
5511
5512 format %{ %}
5513 interface(REG_INTER);
5514 %}
5515
5516 operand rax_RegL()
5517 %{
5518 constraint(ALLOC_IN_RC(long_rax_reg));
5519 match(RegL);
5520 match(rRegL);
5521
5522 format %{ "RAX" %}
5523 interface(REG_INTER);
5524 %}
5525
5526 operand rcx_RegL()
5527 %{
5528 constraint(ALLOC_IN_RC(long_rcx_reg));
5529 match(RegL);
5530 match(rRegL);
5531
5532 format %{ %}
5533 interface(REG_INTER);
5534 %}
5535
5536 operand rdx_RegL()
5537 %{
5538 constraint(ALLOC_IN_RC(long_rdx_reg));
5539 match(RegL);
5540 match(rRegL);
5541
5542 format %{ %}
5543 interface(REG_INTER);
5544 %}
5545
5546 operand r11_RegL()
5547 %{
5548 constraint(ALLOC_IN_RC(long_r11_reg));
5549 match(RegL);
5550 match(rRegL);
5551
5552 format %{ %}
5553 interface(REG_INTER);
5554 %}
5555
5556 operand no_rbp_r13_RegL()
5557 %{
5558 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5559 match(RegL);
5560 match(rRegL);
5561 match(rax_RegL);
5562 match(rcx_RegL);
5563 match(rdx_RegL);
5564
5565 format %{ %}
5566 interface(REG_INTER);
5567 %}
5568
5569 // Flags register, used as output of compare instructions
5570 operand rFlagsReg()
5571 %{
5572 constraint(ALLOC_IN_RC(int_flags));
5573 match(RegFlags);
5574
5575 format %{ "RFLAGS" %}
5576 interface(REG_INTER);
5577 %}
5578
5579 // Flags register, used as output of FLOATING POINT compare instructions
5580 operand rFlagsRegU()
5581 %{
5582 constraint(ALLOC_IN_RC(int_flags));
5583 match(RegFlags);
5584
5585 format %{ "RFLAGS_U" %}
5586 interface(REG_INTER);
5587 %}
5588
5589 operand rFlagsRegUCF() %{
5590 constraint(ALLOC_IN_RC(int_flags));
5591 match(RegFlags);
5592 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5593
5594 format %{ "RFLAGS_U_CF" %}
5595 interface(REG_INTER);
5596 %}
5597
5598 operand rFlagsRegUCFE() %{
5599 constraint(ALLOC_IN_RC(int_flags));
5600 match(RegFlags);
5601 predicate(UseAPX && VM_Version::supports_avx10_2());
5602
5603 format %{ "RFLAGS_U_CFE" %}
5604 interface(REG_INTER);
5605 %}
5606
5607 // Float register operands
5608 operand regF() %{
5609 constraint(ALLOC_IN_RC(float_reg));
5610 match(RegF);
5611
5612 format %{ %}
5613 interface(REG_INTER);
5614 %}
5615
5616 // Float register operands
5617 operand legRegF() %{
5618 constraint(ALLOC_IN_RC(float_reg_legacy));
5619 match(RegF);
5620
5621 format %{ %}
5622 interface(REG_INTER);
5623 %}
5624
5625 // Float register operands
5626 operand vlRegF() %{
5627 constraint(ALLOC_IN_RC(float_reg_vl));
5628 match(RegF);
5629
5630 format %{ %}
5631 interface(REG_INTER);
5632 %}
5633
5634 // Double register operands
5635 operand regD() %{
5636 constraint(ALLOC_IN_RC(double_reg));
5637 match(RegD);
5638
5639 format %{ %}
5640 interface(REG_INTER);
5641 %}
5642
5643 // Double register operands
5644 operand legRegD() %{
5645 constraint(ALLOC_IN_RC(double_reg_legacy));
5646 match(RegD);
5647
5648 format %{ %}
5649 interface(REG_INTER);
5650 %}
5651
5652 // Double register operands
5653 operand vlRegD() %{
5654 constraint(ALLOC_IN_RC(double_reg_vl));
5655 match(RegD);
5656
5657 format %{ %}
5658 interface(REG_INTER);
5659 %}
5660
5661 //----------Memory Operands----------------------------------------------------
5662 // Direct Memory Operand
5663 // operand direct(immP addr)
5664 // %{
5665 // match(addr);
5666
5667 // format %{ "[$addr]" %}
5668 // interface(MEMORY_INTER) %{
5669 // base(0xFFFFFFFF);
5670 // index(0x4);
5671 // scale(0x0);
5672 // disp($addr);
5673 // %}
5674 // %}
5675
5676 // Indirect Memory Operand
5677 operand indirect(any_RegP reg)
5678 %{
5679 constraint(ALLOC_IN_RC(ptr_reg));
5680 match(reg);
5681
5682 format %{ "[$reg]" %}
5683 interface(MEMORY_INTER) %{
5684 base($reg);
5685 index(0x4);
5686 scale(0x0);
5687 disp(0x0);
5688 %}
5689 %}
5690
5691 // Indirect Memory Plus Short Offset Operand
5692 operand indOffset8(any_RegP reg, immL8 off)
5693 %{
5694 constraint(ALLOC_IN_RC(ptr_reg));
5695 match(AddP reg off);
5696
5697 format %{ "[$reg + $off (8-bit)]" %}
5698 interface(MEMORY_INTER) %{
5699 base($reg);
5700 index(0x4);
5701 scale(0x0);
5702 disp($off);
5703 %}
5704 %}
5705
5706 // Indirect Memory Plus Long Offset Operand
5707 operand indOffset32(any_RegP reg, immL32 off)
5708 %{
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(AddP reg off);
5711
5712 format %{ "[$reg + $off (32-bit)]" %}
5713 interface(MEMORY_INTER) %{
5714 base($reg);
5715 index(0x4);
5716 scale(0x0);
5717 disp($off);
5718 %}
5719 %}
5720
5721 // Indirect Memory Plus Index Register Plus Offset Operand
5722 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5723 %{
5724 constraint(ALLOC_IN_RC(ptr_reg));
5725 match(AddP (AddP reg lreg) off);
5726
5727 op_cost(10);
5728 format %{"[$reg + $off + $lreg]" %}
5729 interface(MEMORY_INTER) %{
5730 base($reg);
5731 index($lreg);
5732 scale(0x0);
5733 disp($off);
5734 %}
5735 %}
5736
5737 // Indirect Memory Plus Index Register Plus Offset Operand
5738 operand indIndex(any_RegP reg, rRegL lreg)
5739 %{
5740 constraint(ALLOC_IN_RC(ptr_reg));
5741 match(AddP reg lreg);
5742
5743 op_cost(10);
5744 format %{"[$reg + $lreg]" %}
5745 interface(MEMORY_INTER) %{
5746 base($reg);
5747 index($lreg);
5748 scale(0x0);
5749 disp(0x0);
5750 %}
5751 %}
5752
5753 // Indirect Memory Times Scale Plus Index Register
5754 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5755 %{
5756 constraint(ALLOC_IN_RC(ptr_reg));
5757 match(AddP reg (LShiftL lreg scale));
5758
5759 op_cost(10);
5760 format %{"[$reg + $lreg << $scale]" %}
5761 interface(MEMORY_INTER) %{
5762 base($reg);
5763 index($lreg);
5764 scale($scale);
5765 disp(0x0);
5766 %}
5767 %}
5768
5769 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5770 %{
5771 constraint(ALLOC_IN_RC(ptr_reg));
5772 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5773 match(AddP reg (LShiftL (ConvI2L idx) scale));
5774
5775 op_cost(10);
5776 format %{"[$reg + pos $idx << $scale]" %}
5777 interface(MEMORY_INTER) %{
5778 base($reg);
5779 index($idx);
5780 scale($scale);
5781 disp(0x0);
5782 %}
5783 %}
5784
5785 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5786 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5787 %{
5788 constraint(ALLOC_IN_RC(ptr_reg));
5789 match(AddP (AddP reg (LShiftL lreg scale)) off);
5790
5791 op_cost(10);
5792 format %{"[$reg + $off + $lreg << $scale]" %}
5793 interface(MEMORY_INTER) %{
5794 base($reg);
5795 index($lreg);
5796 scale($scale);
5797 disp($off);
5798 %}
5799 %}
5800
5801 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5802 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5803 %{
5804 constraint(ALLOC_IN_RC(ptr_reg));
5805 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5806 match(AddP (AddP reg (ConvI2L idx)) off);
5807
5808 op_cost(10);
5809 format %{"[$reg + $off + $idx]" %}
5810 interface(MEMORY_INTER) %{
5811 base($reg);
5812 index($idx);
5813 scale(0x0);
5814 disp($off);
5815 %}
5816 %}
5817
5818 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5819 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5820 %{
5821 constraint(ALLOC_IN_RC(ptr_reg));
5822 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5823 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5824
5825 op_cost(10);
5826 format %{"[$reg + $off + $idx << $scale]" %}
5827 interface(MEMORY_INTER) %{
5828 base($reg);
5829 index($idx);
5830 scale($scale);
5831 disp($off);
5832 %}
5833 %}
5834
5835 // Indirect Narrow Oop Operand
5836 operand indCompressedOop(rRegN reg) %{
5837 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5838 constraint(ALLOC_IN_RC(ptr_reg));
5839 match(DecodeN reg);
5840
5841 op_cost(10);
5842 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5843 interface(MEMORY_INTER) %{
5844 base(0xc); // R12
5845 index($reg);
5846 scale(0x3);
5847 disp(0x0);
5848 %}
5849 %}
5850
5851 // Indirect Narrow Oop Plus Offset Operand
5852 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5853 // we can't free r12 even with CompressedOops::base() == nullptr.
5854 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5855 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5856 constraint(ALLOC_IN_RC(ptr_reg));
5857 match(AddP (DecodeN reg) off);
5858
5859 op_cost(10);
5860 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5861 interface(MEMORY_INTER) %{
5862 base(0xc); // R12
5863 index($reg);
5864 scale(0x3);
5865 disp($off);
5866 %}
5867 %}
5868
5869 // Indirect Memory Operand
5870 operand indirectNarrow(rRegN reg)
5871 %{
5872 predicate(CompressedOops::shift() == 0);
5873 constraint(ALLOC_IN_RC(ptr_reg));
5874 match(DecodeN reg);
5875
5876 format %{ "[$reg]" %}
5877 interface(MEMORY_INTER) %{
5878 base($reg);
5879 index(0x4);
5880 scale(0x0);
5881 disp(0x0);
5882 %}
5883 %}
5884
5885 // Indirect Memory Plus Short Offset Operand
5886 operand indOffset8Narrow(rRegN reg, immL8 off)
5887 %{
5888 predicate(CompressedOops::shift() == 0);
5889 constraint(ALLOC_IN_RC(ptr_reg));
5890 match(AddP (DecodeN reg) off);
5891
5892 format %{ "[$reg + $off (8-bit)]" %}
5893 interface(MEMORY_INTER) %{
5894 base($reg);
5895 index(0x4);
5896 scale(0x0);
5897 disp($off);
5898 %}
5899 %}
5900
5901 // Indirect Memory Plus Long Offset Operand
5902 operand indOffset32Narrow(rRegN reg, immL32 off)
5903 %{
5904 predicate(CompressedOops::shift() == 0);
5905 constraint(ALLOC_IN_RC(ptr_reg));
5906 match(AddP (DecodeN reg) off);
5907
5908 format %{ "[$reg + $off (32-bit)]" %}
5909 interface(MEMORY_INTER) %{
5910 base($reg);
5911 index(0x4);
5912 scale(0x0);
5913 disp($off);
5914 %}
5915 %}
5916
5917 // Indirect Memory Plus Index Register Plus Offset Operand
5918 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5919 %{
5920 predicate(CompressedOops::shift() == 0);
5921 constraint(ALLOC_IN_RC(ptr_reg));
5922 match(AddP (AddP (DecodeN reg) lreg) off);
5923
5924 op_cost(10);
5925 format %{"[$reg + $off + $lreg]" %}
5926 interface(MEMORY_INTER) %{
5927 base($reg);
5928 index($lreg);
5929 scale(0x0);
5930 disp($off);
5931 %}
5932 %}
5933
5934 // Indirect Memory Plus Index Register Plus Offset Operand
5935 operand indIndexNarrow(rRegN reg, rRegL lreg)
5936 %{
5937 predicate(CompressedOops::shift() == 0);
5938 constraint(ALLOC_IN_RC(ptr_reg));
5939 match(AddP (DecodeN reg) lreg);
5940
5941 op_cost(10);
5942 format %{"[$reg + $lreg]" %}
5943 interface(MEMORY_INTER) %{
5944 base($reg);
5945 index($lreg);
5946 scale(0x0);
5947 disp(0x0);
5948 %}
5949 %}
5950
5951 // Indirect Memory Times Scale Plus Index Register
5952 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5953 %{
5954 predicate(CompressedOops::shift() == 0);
5955 constraint(ALLOC_IN_RC(ptr_reg));
5956 match(AddP (DecodeN reg) (LShiftL lreg scale));
5957
5958 op_cost(10);
5959 format %{"[$reg + $lreg << $scale]" %}
5960 interface(MEMORY_INTER) %{
5961 base($reg);
5962 index($lreg);
5963 scale($scale);
5964 disp(0x0);
5965 %}
5966 %}
5967
5968 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5969 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5970 %{
5971 predicate(CompressedOops::shift() == 0);
5972 constraint(ALLOC_IN_RC(ptr_reg));
5973 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5974
5975 op_cost(10);
5976 format %{"[$reg + $off + $lreg << $scale]" %}
5977 interface(MEMORY_INTER) %{
5978 base($reg);
5979 index($lreg);
5980 scale($scale);
5981 disp($off);
5982 %}
5983 %}
5984
5985 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5986 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5987 %{
5988 constraint(ALLOC_IN_RC(ptr_reg));
5989 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5990 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5991
5992 op_cost(10);
5993 format %{"[$reg + $off + $idx]" %}
5994 interface(MEMORY_INTER) %{
5995 base($reg);
5996 index($idx);
5997 scale(0x0);
5998 disp($off);
5999 %}
6000 %}
6001
6002 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
6003 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
6004 %{
6005 constraint(ALLOC_IN_RC(ptr_reg));
6006 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
6007 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
6008
6009 op_cost(10);
6010 format %{"[$reg + $off + $idx << $scale]" %}
6011 interface(MEMORY_INTER) %{
6012 base($reg);
6013 index($idx);
6014 scale($scale);
6015 disp($off);
6016 %}
6017 %}
6018
6019 //----------Special Memory Operands--------------------------------------------
6020 // Stack Slot Operand - This operand is used for loading and storing temporary
6021 // values on the stack where a match requires a value to
6022 // flow through memory.
6023 operand stackSlotP(sRegP reg)
6024 %{
6025 constraint(ALLOC_IN_RC(stack_slots));
6026 // No match rule because this operand is only generated in matching
6027
6028 format %{ "[$reg]" %}
6029 interface(MEMORY_INTER) %{
6030 base(0x4); // RSP
6031 index(0x4); // No Index
6032 scale(0x0); // No Scale
6033 disp($reg); // Stack Offset
6034 %}
6035 %}
6036
6037 operand stackSlotI(sRegI reg)
6038 %{
6039 constraint(ALLOC_IN_RC(stack_slots));
6040 // No match rule because this operand is only generated in matching
6041
6042 format %{ "[$reg]" %}
6043 interface(MEMORY_INTER) %{
6044 base(0x4); // RSP
6045 index(0x4); // No Index
6046 scale(0x0); // No Scale
6047 disp($reg); // Stack Offset
6048 %}
6049 %}
6050
6051 operand stackSlotF(sRegF reg)
6052 %{
6053 constraint(ALLOC_IN_RC(stack_slots));
6054 // No match rule because this operand is only generated in matching
6055
6056 format %{ "[$reg]" %}
6057 interface(MEMORY_INTER) %{
6058 base(0x4); // RSP
6059 index(0x4); // No Index
6060 scale(0x0); // No Scale
6061 disp($reg); // Stack Offset
6062 %}
6063 %}
6064
6065 operand stackSlotD(sRegD reg)
6066 %{
6067 constraint(ALLOC_IN_RC(stack_slots));
6068 // No match rule because this operand is only generated in matching
6069
6070 format %{ "[$reg]" %}
6071 interface(MEMORY_INTER) %{
6072 base(0x4); // RSP
6073 index(0x4); // No Index
6074 scale(0x0); // No Scale
6075 disp($reg); // Stack Offset
6076 %}
6077 %}
6078 operand stackSlotL(sRegL reg)
6079 %{
6080 constraint(ALLOC_IN_RC(stack_slots));
6081 // No match rule because this operand is only generated in matching
6082
6083 format %{ "[$reg]" %}
6084 interface(MEMORY_INTER) %{
6085 base(0x4); // RSP
6086 index(0x4); // No Index
6087 scale(0x0); // No Scale
6088 disp($reg); // Stack Offset
6089 %}
6090 %}
6091
6092 //----------Conditional Branch Operands----------------------------------------
6093 // Comparison Op - This is the operation of the comparison, and is limited to
6094 // the following set of codes:
6095 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6096 //
6097 // Other attributes of the comparison, such as unsignedness, are specified
6098 // by the comparison instruction that sets a condition code flags register.
6099 // That result is represented by a flags operand whose subtype is appropriate
6100 // to the unsignedness (etc.) of the comparison.
6101 //
6102 // Later, the instruction which matches both the Comparison Op (a Bool) and
6103 // the flags (produced by the Cmp) specifies the coding of the comparison op
6104 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6105
6106 // Comparison Code
6107 operand cmpOp()
6108 %{
6109 match(Bool);
6110
6111 format %{ "" %}
6112 interface(COND_INTER) %{
6113 equal(0x4, "e");
6114 not_equal(0x5, "ne");
6115 less(0xc, "l");
6116 greater_equal(0xd, "ge");
6117 less_equal(0xe, "le");
6118 greater(0xf, "g");
6119 overflow(0x0, "o");
6120 no_overflow(0x1, "no");
6121 %}
6122 %}
6123
6124 // Comparison Code, unsigned compare. Used by FP also, with
6125 // C2 (unordered) turned into GT or LT already. The other bits
6126 // C0 and C3 are turned into Carry & Zero flags.
6127 operand cmpOpU()
6128 %{
6129 match(Bool);
6130
6131 format %{ "" %}
6132 interface(COND_INTER) %{
6133 equal(0x4, "e");
6134 not_equal(0x5, "ne");
6135 less(0x2, "b");
6136 greater_equal(0x3, "ae");
6137 less_equal(0x6, "be");
6138 greater(0x7, "a");
6139 overflow(0x0, "o");
6140 no_overflow(0x1, "no");
6141 %}
6142 %}
6143
6144
6145 // Floating comparisons that don't require any fixup for the unordered case,
6146 // If both inputs of the comparison are the same, ZF is always set so we
6147 // don't need to use cmpOpUCF2 for eq/ne
6148 operand cmpOpUCF() %{
6149 match(Bool);
6150 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6151 (n->as_Bool()->_test._test == BoolTest::lt ||
6152 n->as_Bool()->_test._test == BoolTest::ge ||
6153 n->as_Bool()->_test._test == BoolTest::le ||
6154 n->as_Bool()->_test._test == BoolTest::gt ||
6155 n->in(1)->in(1) == n->in(1)->in(2)));
6156 format %{ "" %}
6157 interface(COND_INTER) %{
6158 equal(0xb, "np");
6159 not_equal(0xa, "p");
6160 less(0x2, "b");
6161 greater_equal(0x3, "ae");
6162 less_equal(0x6, "be");
6163 greater(0x7, "a");
6164 overflow(0x0, "o");
6165 no_overflow(0x1, "no");
6166 %}
6167 %}
6168
6169
6170 // Floating comparisons that can be fixed up with extra conditional jumps
6171 operand cmpOpUCF2() %{
6172 match(Bool);
6173 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6174 (n->as_Bool()->_test._test == BoolTest::ne ||
6175 n->as_Bool()->_test._test == BoolTest::eq) &&
6176 n->in(1)->in(1) != n->in(1)->in(2));
6177 format %{ "" %}
6178 interface(COND_INTER) %{
6179 equal(0x4, "e");
6180 not_equal(0x5, "ne");
6181 less(0x2, "b");
6182 greater_equal(0x3, "ae");
6183 less_equal(0x6, "be");
6184 greater(0x7, "a");
6185 overflow(0x0, "o");
6186 no_overflow(0x1, "no");
6187 %}
6188 %}
6189
6190
6191 // Floating point comparisons that set condition flags to test more directly,
6192 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6193 // are used for L (<) and LE (<=) conditions. It's important to convert these
6194 // latter conditions to ones that use unsigned tests before passing into an
6195 // instruction because the preceding comparison might be based on a three way
6196 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6197 operand cmpOpUCFE()
6198 %{
6199 match(Bool);
6200 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6201 (n->as_Bool()->_test._test == BoolTest::ne ||
6202 n->as_Bool()->_test._test == BoolTest::eq ||
6203 n->as_Bool()->_test._test == BoolTest::lt ||
6204 n->as_Bool()->_test._test == BoolTest::ge ||
6205 n->as_Bool()->_test._test == BoolTest::le ||
6206 n->as_Bool()->_test._test == BoolTest::gt));
6207
6208 format %{ "" %}
6209 interface(COND_INTER) %{
6210 equal(0x4, "e");
6211 not_equal(0x5, "ne");
6212 less(0x2, "b");
6213 greater_equal(0x3, "ae");
6214 less_equal(0x6, "be");
6215 greater(0x7, "a");
6216 overflow(0x0, "o");
6217 no_overflow(0x1, "no");
6218 %}
6219 %}
6220
6221 // Operands for bound floating pointer register arguments
6222 operand rxmm0() %{
6223 constraint(ALLOC_IN_RC(xmm0_reg));
6224 match(VecX);
6225 format%{%}
6226 interface(REG_INTER);
6227 %}
6228
6229 // Vectors
6230
6231 // Dummy generic vector class. Should be used for all vector operands.
6232 // Replaced with vec[SDXYZ] during post-selection pass.
6233 operand vec() %{
6234 constraint(ALLOC_IN_RC(dynamic));
6235 match(VecX);
6236 match(VecY);
6237 match(VecZ);
6238 match(VecS);
6239 match(VecD);
6240
6241 format %{ %}
6242 interface(REG_INTER);
6243 %}
6244
6245 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6246 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6247 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6248 // runtime code generation via reg_class_dynamic.
6249 operand legVec() %{
6250 constraint(ALLOC_IN_RC(dynamic));
6251 match(VecX);
6252 match(VecY);
6253 match(VecZ);
6254 match(VecS);
6255 match(VecD);
6256
6257 format %{ %}
6258 interface(REG_INTER);
6259 %}
6260
6261 // Replaces vec during post-selection cleanup. See above.
6262 operand vecS() %{
6263 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6264 match(VecS);
6265
6266 format %{ %}
6267 interface(REG_INTER);
6268 %}
6269
6270 // Replaces legVec during post-selection cleanup. See above.
6271 operand legVecS() %{
6272 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6273 match(VecS);
6274
6275 format %{ %}
6276 interface(REG_INTER);
6277 %}
6278
6279 // Replaces vec during post-selection cleanup. See above.
6280 operand vecD() %{
6281 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6282 match(VecD);
6283
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 // Replaces legVec during post-selection cleanup. See above.
6289 operand legVecD() %{
6290 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6291 match(VecD);
6292
6293 format %{ %}
6294 interface(REG_INTER);
6295 %}
6296
6297 // Replaces vec during post-selection cleanup. See above.
6298 operand vecX() %{
6299 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6300 match(VecX);
6301
6302 format %{ %}
6303 interface(REG_INTER);
6304 %}
6305
6306 // Replaces legVec during post-selection cleanup. See above.
6307 operand legVecX() %{
6308 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6309 match(VecX);
6310
6311 format %{ %}
6312 interface(REG_INTER);
6313 %}
6314
6315 // Replaces vec during post-selection cleanup. See above.
6316 operand vecY() %{
6317 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6318 match(VecY);
6319
6320 format %{ %}
6321 interface(REG_INTER);
6322 %}
6323
6324 // Replaces legVec during post-selection cleanup. See above.
6325 operand legVecY() %{
6326 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6327 match(VecY);
6328
6329 format %{ %}
6330 interface(REG_INTER);
6331 %}
6332
6333 // Replaces vec during post-selection cleanup. See above.
6334 operand vecZ() %{
6335 constraint(ALLOC_IN_RC(vectorz_reg));
6336 match(VecZ);
6337
6338 format %{ %}
6339 interface(REG_INTER);
6340 %}
6341
6342 // Replaces legVec during post-selection cleanup. See above.
6343 operand legVecZ() %{
6344 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6345 match(VecZ);
6346
6347 format %{ %}
6348 interface(REG_INTER);
6349 %}
6350
6351 //----------OPERAND CLASSES----------------------------------------------------
6352 // Operand Classes are groups of operands that are used as to simplify
6353 // instruction definitions by not requiring the AD writer to specify separate
6354 // instructions for every form of operand when the instruction accepts
6355 // multiple operand types with the same basic encoding and format. The classic
6356 // case of this is memory operands.
6357
6358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6359 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6360 indCompressedOop, indCompressedOopOffset,
6361 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6362 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6363 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6364
6365 //----------PIPELINE-----------------------------------------------------------
6366 // Rules which define the behavior of the target architectures pipeline.
6367 pipeline %{
6368
6369 //----------ATTRIBUTES---------------------------------------------------------
6370 attributes %{
6371 variable_size_instructions; // Fixed size instructions
6372 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6373 instruction_unit_size = 1; // An instruction is 1 bytes long
6374 instruction_fetch_unit_size = 16; // The processor fetches one line
6375 instruction_fetch_units = 1; // of 16 bytes
6376 %}
6377
6378 //----------RESOURCES----------------------------------------------------------
6379 // Resources are the functional units available to the machine
6380
6381 // Generic P2/P3 pipeline
6382 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6383 // 3 instructions decoded per cycle.
6384 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6385 // 3 ALU op, only ALU0 handles mul instructions.
6386 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6387 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6388 BR, FPU,
6389 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6390
6391 //----------PIPELINE DESCRIPTION-----------------------------------------------
6392 // Pipeline Description specifies the stages in the machine's pipeline
6393
6394 // Generic P2/P3 pipeline
6395 pipe_desc(S0, S1, S2, S3, S4, S5);
6396
6397 //----------PIPELINE CLASSES---------------------------------------------------
6398 // Pipeline Classes describe the stages in which input and output are
6399 // referenced by the hardware pipeline.
6400
6401 // Naming convention: ialu or fpu
6402 // Then: _reg
6403 // Then: _reg if there is a 2nd register
6404 // Then: _long if it's a pair of instructions implementing a long
6405 // Then: _fat if it requires the big decoder
6406 // Or: _mem if it requires the big decoder and a memory unit.
6407
6408 // Integer ALU reg operation
6409 pipe_class ialu_reg(rRegI dst)
6410 %{
6411 single_instruction;
6412 dst : S4(write);
6413 dst : S3(read);
6414 DECODE : S0; // any decoder
6415 ALU : S3; // any alu
6416 %}
6417
6418 // Long ALU reg operation
6419 pipe_class ialu_reg_long(rRegL dst)
6420 %{
6421 instruction_count(2);
6422 dst : S4(write);
6423 dst : S3(read);
6424 DECODE : S0(2); // any 2 decoders
6425 ALU : S3(2); // both alus
6426 %}
6427
6428 // Integer ALU reg operation using big decoder
6429 pipe_class ialu_reg_fat(rRegI dst)
6430 %{
6431 single_instruction;
6432 dst : S4(write);
6433 dst : S3(read);
6434 D0 : S0; // big decoder only
6435 ALU : S3; // any alu
6436 %}
6437
6438 // Integer ALU reg-reg operation
6439 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6440 %{
6441 single_instruction;
6442 dst : S4(write);
6443 src : S3(read);
6444 DECODE : S0; // any decoder
6445 ALU : S3; // any alu
6446 %}
6447
6448 // Integer ALU reg-reg operation
6449 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6450 %{
6451 single_instruction;
6452 dst : S4(write);
6453 src : S3(read);
6454 D0 : S0; // big decoder only
6455 ALU : S3; // any alu
6456 %}
6457
6458 // Integer ALU reg-mem operation
6459 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6460 %{
6461 single_instruction;
6462 dst : S5(write);
6463 mem : S3(read);
6464 D0 : S0; // big decoder only
6465 ALU : S4; // any alu
6466 MEM : S3; // any mem
6467 %}
6468
6469 // Integer mem operation (prefetch)
6470 pipe_class ialu_mem(memory mem)
6471 %{
6472 single_instruction;
6473 mem : S3(read);
6474 D0 : S0; // big decoder only
6475 MEM : S3; // any mem
6476 %}
6477
6478 // Integer Store to Memory
6479 pipe_class ialu_mem_reg(memory mem, rRegI src)
6480 %{
6481 single_instruction;
6482 mem : S3(read);
6483 src : S5(read);
6484 D0 : S0; // big decoder only
6485 ALU : S4; // any alu
6486 MEM : S3;
6487 %}
6488
6489 // // Long Store to Memory
6490 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6491 // %{
6492 // instruction_count(2);
6493 // mem : S3(read);
6494 // src : S5(read);
6495 // D0 : S0(2); // big decoder only; twice
6496 // ALU : S4(2); // any 2 alus
6497 // MEM : S3(2); // Both mems
6498 // %}
6499
6500 // Integer Store to Memory
6501 pipe_class ialu_mem_imm(memory mem)
6502 %{
6503 single_instruction;
6504 mem : S3(read);
6505 D0 : S0; // big decoder only
6506 ALU : S4; // any alu
6507 MEM : S3;
6508 %}
6509
6510 // Integer ALU0 reg-reg operation
6511 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6512 %{
6513 single_instruction;
6514 dst : S4(write);
6515 src : S3(read);
6516 D0 : S0; // Big decoder only
6517 ALU0 : S3; // only alu0
6518 %}
6519
6520 // Integer ALU0 reg-mem operation
6521 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6522 %{
6523 single_instruction;
6524 dst : S5(write);
6525 mem : S3(read);
6526 D0 : S0; // big decoder only
6527 ALU0 : S4; // ALU0 only
6528 MEM : S3; // any mem
6529 %}
6530
6531 // Integer ALU reg-reg operation
6532 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6533 %{
6534 single_instruction;
6535 cr : S4(write);
6536 src1 : S3(read);
6537 src2 : S3(read);
6538 DECODE : S0; // any decoder
6539 ALU : S3; // any alu
6540 %}
6541
6542 // Integer ALU reg-imm operation
6543 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6544 %{
6545 single_instruction;
6546 cr : S4(write);
6547 src1 : S3(read);
6548 DECODE : S0; // any decoder
6549 ALU : S3; // any alu
6550 %}
6551
6552 // Integer ALU reg-mem operation
6553 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6554 %{
6555 single_instruction;
6556 cr : S4(write);
6557 src1 : S3(read);
6558 src2 : S3(read);
6559 D0 : S0; // big decoder only
6560 ALU : S4; // any alu
6561 MEM : S3;
6562 %}
6563
6564 // Conditional move reg-reg
6565 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6566 %{
6567 instruction_count(4);
6568 y : S4(read);
6569 q : S3(read);
6570 p : S3(read);
6571 DECODE : S0(4); // any decoder
6572 %}
6573
6574 // Conditional move reg-reg
6575 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6576 %{
6577 single_instruction;
6578 dst : S4(write);
6579 src : S3(read);
6580 cr : S3(read);
6581 DECODE : S0; // any decoder
6582 %}
6583
6584 // Conditional move reg-mem
6585 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6586 %{
6587 single_instruction;
6588 dst : S4(write);
6589 src : S3(read);
6590 cr : S3(read);
6591 DECODE : S0; // any decoder
6592 MEM : S3;
6593 %}
6594
6595 // Conditional move reg-reg long
6596 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6597 %{
6598 single_instruction;
6599 dst : S4(write);
6600 src : S3(read);
6601 cr : S3(read);
6602 DECODE : S0(2); // any 2 decoders
6603 %}
6604
6605 // Float reg-reg operation
6606 pipe_class fpu_reg(regD dst)
6607 %{
6608 instruction_count(2);
6609 dst : S3(read);
6610 DECODE : S0(2); // any 2 decoders
6611 FPU : S3;
6612 %}
6613
6614 // Float reg-reg operation
6615 pipe_class fpu_reg_reg(regD dst, regD src)
6616 %{
6617 instruction_count(2);
6618 dst : S4(write);
6619 src : S3(read);
6620 DECODE : S0(2); // any 2 decoders
6621 FPU : S3;
6622 %}
6623
6624 // Float reg-reg operation
6625 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6626 %{
6627 instruction_count(3);
6628 dst : S4(write);
6629 src1 : S3(read);
6630 src2 : S3(read);
6631 DECODE : S0(3); // any 3 decoders
6632 FPU : S3(2);
6633 %}
6634
6635 // Float reg-reg operation
6636 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6637 %{
6638 instruction_count(4);
6639 dst : S4(write);
6640 src1 : S3(read);
6641 src2 : S3(read);
6642 src3 : S3(read);
6643 DECODE : S0(4); // any 3 decoders
6644 FPU : S3(2);
6645 %}
6646
6647 // Float reg-reg operation
6648 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6649 %{
6650 instruction_count(4);
6651 dst : S4(write);
6652 src1 : S3(read);
6653 src2 : S3(read);
6654 src3 : S3(read);
6655 DECODE : S1(3); // any 3 decoders
6656 D0 : S0; // Big decoder only
6657 FPU : S3(2);
6658 MEM : S3;
6659 %}
6660
6661 // Float reg-mem operation
6662 pipe_class fpu_reg_mem(regD dst, memory mem)
6663 %{
6664 instruction_count(2);
6665 dst : S5(write);
6666 mem : S3(read);
6667 D0 : S0; // big decoder only
6668 DECODE : S1; // any decoder for FPU POP
6669 FPU : S4;
6670 MEM : S3; // any mem
6671 %}
6672
6673 // Float reg-mem operation
6674 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6675 %{
6676 instruction_count(3);
6677 dst : S5(write);
6678 src1 : S3(read);
6679 mem : S3(read);
6680 D0 : S0; // big decoder only
6681 DECODE : S1(2); // any decoder for FPU POP
6682 FPU : S4;
6683 MEM : S3; // any mem
6684 %}
6685
6686 // Float mem-reg operation
6687 pipe_class fpu_mem_reg(memory mem, regD src)
6688 %{
6689 instruction_count(2);
6690 src : S5(read);
6691 mem : S3(read);
6692 DECODE : S0; // any decoder for FPU PUSH
6693 D0 : S1; // big decoder only
6694 FPU : S4;
6695 MEM : S3; // any mem
6696 %}
6697
6698 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6699 %{
6700 instruction_count(3);
6701 src1 : S3(read);
6702 src2 : S3(read);
6703 mem : S3(read);
6704 DECODE : S0(2); // any decoder for FPU PUSH
6705 D0 : S1; // big decoder only
6706 FPU : S4;
6707 MEM : S3; // any mem
6708 %}
6709
6710 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6711 %{
6712 instruction_count(3);
6713 src1 : S3(read);
6714 src2 : S3(read);
6715 mem : S4(read);
6716 DECODE : S0; // any decoder for FPU PUSH
6717 D0 : S0(2); // big decoder only
6718 FPU : S4;
6719 MEM : S3(2); // any mem
6720 %}
6721
6722 pipe_class fpu_mem_mem(memory dst, memory src1)
6723 %{
6724 instruction_count(2);
6725 src1 : S3(read);
6726 dst : S4(read);
6727 D0 : S0(2); // big decoder only
6728 MEM : S3(2); // any mem
6729 %}
6730
6731 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6732 %{
6733 instruction_count(3);
6734 src1 : S3(read);
6735 src2 : S3(read);
6736 dst : S4(read);
6737 D0 : S0(3); // big decoder only
6738 FPU : S4;
6739 MEM : S3(3); // any mem
6740 %}
6741
6742 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6743 %{
6744 instruction_count(3);
6745 src1 : S4(read);
6746 mem : S4(read);
6747 DECODE : S0; // any decoder for FPU PUSH
6748 D0 : S0(2); // big decoder only
6749 FPU : S4;
6750 MEM : S3(2); // any mem
6751 %}
6752
6753 // Float load constant
6754 pipe_class fpu_reg_con(regD dst)
6755 %{
6756 instruction_count(2);
6757 dst : S5(write);
6758 D0 : S0; // big decoder only for the load
6759 DECODE : S1; // any decoder for FPU POP
6760 FPU : S4;
6761 MEM : S3; // any mem
6762 %}
6763
6764 // Float load constant
6765 pipe_class fpu_reg_reg_con(regD dst, regD src)
6766 %{
6767 instruction_count(3);
6768 dst : S5(write);
6769 src : S3(read);
6770 D0 : S0; // big decoder only for the load
6771 DECODE : S1(2); // any decoder for FPU POP
6772 FPU : S4;
6773 MEM : S3; // any mem
6774 %}
6775
6776 // UnConditional branch
6777 pipe_class pipe_jmp(label labl)
6778 %{
6779 single_instruction;
6780 BR : S3;
6781 %}
6782
6783 // Conditional branch
6784 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6785 %{
6786 single_instruction;
6787 cr : S1(read);
6788 BR : S3;
6789 %}
6790
6791 // Allocation idiom
6792 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6793 %{
6794 instruction_count(1); force_serialization;
6795 fixed_latency(6);
6796 heap_ptr : S3(read);
6797 DECODE : S0(3);
6798 D0 : S2;
6799 MEM : S3;
6800 ALU : S3(2);
6801 dst : S5(write);
6802 BR : S5;
6803 %}
6804
6805 // Generic big/slow expanded idiom
6806 pipe_class pipe_slow()
6807 %{
6808 instruction_count(10); multiple_bundles; force_serialization;
6809 fixed_latency(100);
6810 D0 : S0(2);
6811 MEM : S3(2);
6812 %}
6813
6814 // The real do-nothing guy
6815 pipe_class empty()
6816 %{
6817 instruction_count(0);
6818 %}
6819
6820 // Define the class for the Nop node
6821 define
6822 %{
6823 MachNop = empty;
6824 %}
6825
6826 %}
6827
6828 //----------INSTRUCTIONS-------------------------------------------------------
6829 //
6830 // match -- States which machine-independent subtree may be replaced
6831 // by this instruction.
6832 // ins_cost -- The estimated cost of this instruction is used by instruction
6833 // selection to identify a minimum cost tree of machine
6834 // instructions that matches a tree of machine-independent
6835 // instructions.
6836 // format -- A string providing the disassembly for this instruction.
6837 // The value of an instruction's operand may be inserted
6838 // by referring to it with a '$' prefix.
6839 // opcode -- Three instruction opcodes may be provided. These are referred
6840 // to within an encode class as $primary, $secondary, and $tertiary
6841 // rrspectively. The primary opcode is commonly used to
6842 // indicate the type of machine instruction, while secondary
6843 // and tertiary are often used for prefix options or addressing
6844 // modes.
6845 // ins_encode -- A list of encode classes with parameters. The encode class
6846 // name must have been defined in an 'enc_class' specification
6847 // in the encode section of the architecture description.
6848
6849 // ============================================================================
6850
6851 instruct ShouldNotReachHere() %{
6852 match(Halt);
6853 format %{ "stop\t# ShouldNotReachHere" %}
6854 ins_encode %{
6855 if (is_reachable()) {
6856 const char* str = __ code_string(_halt_reason);
6857 __ stop(str);
6858 }
6859 %}
6860 ins_pipe(pipe_slow);
6861 %}
6862
6863 // ============================================================================
6864
6865 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6866 // Load Float
6867 instruct MoveF2VL(vlRegF dst, regF src) %{
6868 match(Set dst src);
6869 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6870 ins_encode %{
6871 ShouldNotReachHere();
6872 %}
6873 ins_pipe( fpu_reg_reg );
6874 %}
6875
6876 // Load Float
6877 instruct MoveF2LEG(legRegF dst, regF src) %{
6878 match(Set dst src);
6879 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6880 ins_encode %{
6881 ShouldNotReachHere();
6882 %}
6883 ins_pipe( fpu_reg_reg );
6884 %}
6885
6886 // Load Float
6887 instruct MoveVL2F(regF dst, vlRegF src) %{
6888 match(Set dst src);
6889 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6890 ins_encode %{
6891 ShouldNotReachHere();
6892 %}
6893 ins_pipe( fpu_reg_reg );
6894 %}
6895
6896 // Load Float
6897 instruct MoveLEG2F(regF dst, legRegF src) %{
6898 match(Set dst src);
6899 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6900 ins_encode %{
6901 ShouldNotReachHere();
6902 %}
6903 ins_pipe( fpu_reg_reg );
6904 %}
6905
6906 // Load Double
6907 instruct MoveD2VL(vlRegD dst, regD src) %{
6908 match(Set dst src);
6909 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6910 ins_encode %{
6911 ShouldNotReachHere();
6912 %}
6913 ins_pipe( fpu_reg_reg );
6914 %}
6915
6916 // Load Double
6917 instruct MoveD2LEG(legRegD dst, regD src) %{
6918 match(Set dst src);
6919 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6920 ins_encode %{
6921 ShouldNotReachHere();
6922 %}
6923 ins_pipe( fpu_reg_reg );
6924 %}
6925
6926 // Load Double
6927 instruct MoveVL2D(regD dst, vlRegD src) %{
6928 match(Set dst src);
6929 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6930 ins_encode %{
6931 ShouldNotReachHere();
6932 %}
6933 ins_pipe( fpu_reg_reg );
6934 %}
6935
6936 // Load Double
6937 instruct MoveLEG2D(regD dst, legRegD src) %{
6938 match(Set dst src);
6939 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6940 ins_encode %{
6941 ShouldNotReachHere();
6942 %}
6943 ins_pipe( fpu_reg_reg );
6944 %}
6945
6946 //----------Load/Store/Move Instructions---------------------------------------
6947 //----------Load Instructions--------------------------------------------------
6948
6949 // Load Byte (8 bit signed)
6950 instruct loadB(rRegI dst, memory mem)
6951 %{
6952 match(Set dst (LoadB mem));
6953
6954 ins_cost(125);
6955 format %{ "movsbl $dst, $mem\t# byte" %}
6956
6957 ins_encode %{
6958 __ movsbl($dst$$Register, $mem$$Address);
6959 %}
6960
6961 ins_pipe(ialu_reg_mem);
6962 %}
6963
6964 // Load Byte (8 bit signed) into Long Register
6965 instruct loadB2L(rRegL dst, memory mem)
6966 %{
6967 match(Set dst (ConvI2L (LoadB mem)));
6968
6969 ins_cost(125);
6970 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6971
6972 ins_encode %{
6973 __ movsbq($dst$$Register, $mem$$Address);
6974 %}
6975
6976 ins_pipe(ialu_reg_mem);
6977 %}
6978
6979 // Load Unsigned Byte (8 bit UNsigned)
6980 instruct loadUB(rRegI dst, memory mem)
6981 %{
6982 match(Set dst (LoadUB mem));
6983
6984 ins_cost(125);
6985 format %{ "movzbl $dst, $mem\t# ubyte" %}
6986
6987 ins_encode %{
6988 __ movzbl($dst$$Register, $mem$$Address);
6989 %}
6990
6991 ins_pipe(ialu_reg_mem);
6992 %}
6993
6994 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6995 instruct loadUB2L(rRegL dst, memory mem)
6996 %{
6997 match(Set dst (ConvI2L (LoadUB mem)));
6998
6999 ins_cost(125);
7000 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
7001
7002 ins_encode %{
7003 __ movzbq($dst$$Register, $mem$$Address);
7004 %}
7005
7006 ins_pipe(ialu_reg_mem);
7007 %}
7008
7009 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
7010 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7011 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
7012 effect(KILL cr);
7013
7014 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
7015 "andl $dst, right_n_bits($mask, 8)" %}
7016 ins_encode %{
7017 Register Rdst = $dst$$Register;
7018 __ movzbq(Rdst, $mem$$Address);
7019 __ andl(Rdst, $mask$$constant & right_n_bits(8));
7020 %}
7021 ins_pipe(ialu_reg_mem);
7022 %}
7023
7024 // Load Short (16 bit signed)
7025 instruct loadS(rRegI dst, memory mem)
7026 %{
7027 match(Set dst (LoadS mem));
7028
7029 ins_cost(125);
7030 format %{ "movswl $dst, $mem\t# short" %}
7031
7032 ins_encode %{
7033 __ movswl($dst$$Register, $mem$$Address);
7034 %}
7035
7036 ins_pipe(ialu_reg_mem);
7037 %}
7038
7039 // Load Short (16 bit signed) to Byte (8 bit signed)
7040 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7041 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
7042
7043 ins_cost(125);
7044 format %{ "movsbl $dst, $mem\t# short -> byte" %}
7045 ins_encode %{
7046 __ movsbl($dst$$Register, $mem$$Address);
7047 %}
7048 ins_pipe(ialu_reg_mem);
7049 %}
7050
7051 // Load Short (16 bit signed) into Long Register
7052 instruct loadS2L(rRegL dst, memory mem)
7053 %{
7054 match(Set dst (ConvI2L (LoadS mem)));
7055
7056 ins_cost(125);
7057 format %{ "movswq $dst, $mem\t# short -> long" %}
7058
7059 ins_encode %{
7060 __ movswq($dst$$Register, $mem$$Address);
7061 %}
7062
7063 ins_pipe(ialu_reg_mem);
7064 %}
7065
7066 // Load Unsigned Short/Char (16 bit UNsigned)
7067 instruct loadUS(rRegI dst, memory mem)
7068 %{
7069 match(Set dst (LoadUS mem));
7070
7071 ins_cost(125);
7072 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7073
7074 ins_encode %{
7075 __ movzwl($dst$$Register, $mem$$Address);
7076 %}
7077
7078 ins_pipe(ialu_reg_mem);
7079 %}
7080
7081 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7082 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7083 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7084
7085 ins_cost(125);
7086 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7087 ins_encode %{
7088 __ movsbl($dst$$Register, $mem$$Address);
7089 %}
7090 ins_pipe(ialu_reg_mem);
7091 %}
7092
7093 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7094 instruct loadUS2L(rRegL dst, memory mem)
7095 %{
7096 match(Set dst (ConvI2L (LoadUS mem)));
7097
7098 ins_cost(125);
7099 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7100
7101 ins_encode %{
7102 __ movzwq($dst$$Register, $mem$$Address);
7103 %}
7104
7105 ins_pipe(ialu_reg_mem);
7106 %}
7107
7108 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7109 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7110 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7111
7112 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7113 ins_encode %{
7114 __ movzbq($dst$$Register, $mem$$Address);
7115 %}
7116 ins_pipe(ialu_reg_mem);
7117 %}
7118
7119 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7120 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7121 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7122 effect(KILL cr);
7123
7124 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7125 "andl $dst, right_n_bits($mask, 16)" %}
7126 ins_encode %{
7127 Register Rdst = $dst$$Register;
7128 __ movzwq(Rdst, $mem$$Address);
7129 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7130 %}
7131 ins_pipe(ialu_reg_mem);
7132 %}
7133
7134 // Load Integer
7135 instruct loadI(rRegI dst, memory mem)
7136 %{
7137 match(Set dst (LoadI mem));
7138
7139 ins_cost(125);
7140 format %{ "movl $dst, $mem\t# int" %}
7141
7142 ins_encode %{
7143 __ movl($dst$$Register, $mem$$Address);
7144 %}
7145
7146 ins_pipe(ialu_reg_mem);
7147 %}
7148
7149 // Load Integer (32 bit signed) to Byte (8 bit signed)
7150 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7151 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7152
7153 ins_cost(125);
7154 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7155 ins_encode %{
7156 __ movsbl($dst$$Register, $mem$$Address);
7157 %}
7158 ins_pipe(ialu_reg_mem);
7159 %}
7160
7161 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7162 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7163 match(Set dst (AndI (LoadI mem) mask));
7164
7165 ins_cost(125);
7166 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7167 ins_encode %{
7168 __ movzbl($dst$$Register, $mem$$Address);
7169 %}
7170 ins_pipe(ialu_reg_mem);
7171 %}
7172
7173 // Load Integer (32 bit signed) to Short (16 bit signed)
7174 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7175 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7176
7177 ins_cost(125);
7178 format %{ "movswl $dst, $mem\t# int -> short" %}
7179 ins_encode %{
7180 __ movswl($dst$$Register, $mem$$Address);
7181 %}
7182 ins_pipe(ialu_reg_mem);
7183 %}
7184
7185 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7186 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7187 match(Set dst (AndI (LoadI mem) mask));
7188
7189 ins_cost(125);
7190 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7191 ins_encode %{
7192 __ movzwl($dst$$Register, $mem$$Address);
7193 %}
7194 ins_pipe(ialu_reg_mem);
7195 %}
7196
7197 // Load Integer into Long Register
7198 instruct loadI2L(rRegL dst, memory mem)
7199 %{
7200 match(Set dst (ConvI2L (LoadI mem)));
7201
7202 ins_cost(125);
7203 format %{ "movslq $dst, $mem\t# int -> long" %}
7204
7205 ins_encode %{
7206 __ movslq($dst$$Register, $mem$$Address);
7207 %}
7208
7209 ins_pipe(ialu_reg_mem);
7210 %}
7211
7212 // Load Integer with mask 0xFF into Long Register
7213 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7214 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7215
7216 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7217 ins_encode %{
7218 __ movzbq($dst$$Register, $mem$$Address);
7219 %}
7220 ins_pipe(ialu_reg_mem);
7221 %}
7222
7223 // Load Integer with mask 0xFFFF into Long Register
7224 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7225 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7226
7227 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7228 ins_encode %{
7229 __ movzwq($dst$$Register, $mem$$Address);
7230 %}
7231 ins_pipe(ialu_reg_mem);
7232 %}
7233
7234 // Load Integer with a 31-bit mask into Long Register
7235 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7236 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7237 effect(KILL cr);
7238
7239 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7240 "andl $dst, $mask" %}
7241 ins_encode %{
7242 Register Rdst = $dst$$Register;
7243 __ movl(Rdst, $mem$$Address);
7244 __ andl(Rdst, $mask$$constant);
7245 %}
7246 ins_pipe(ialu_reg_mem);
7247 %}
7248
7249 // Load Unsigned Integer into Long Register
7250 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7251 %{
7252 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7253
7254 ins_cost(125);
7255 format %{ "movl $dst, $mem\t# uint -> long" %}
7256
7257 ins_encode %{
7258 __ movl($dst$$Register, $mem$$Address);
7259 %}
7260
7261 ins_pipe(ialu_reg_mem);
7262 %}
7263
7264 // Load Long
7265 instruct loadL(rRegL dst, memory mem)
7266 %{
7267 match(Set dst (LoadL mem));
7268
7269 ins_cost(125);
7270 format %{ "movq $dst, $mem\t# long" %}
7271
7272 ins_encode %{
7273 __ movq($dst$$Register, $mem$$Address);
7274 %}
7275
7276 ins_pipe(ialu_reg_mem); // XXX
7277 %}
7278
7279 // Load Range
7280 instruct loadRange(rRegI dst, memory mem)
7281 %{
7282 match(Set dst (LoadRange mem));
7283
7284 ins_cost(125); // XXX
7285 format %{ "movl $dst, $mem\t# range" %}
7286 ins_encode %{
7287 __ movl($dst$$Register, $mem$$Address);
7288 %}
7289 ins_pipe(ialu_reg_mem);
7290 %}
7291
7292 // Load Pointer
7293 instruct loadP(rRegP dst, memory mem)
7294 %{
7295 match(Set dst (LoadP mem));
7296 predicate(n->as_Load()->barrier_data() == 0);
7297
7298 ins_cost(125); // XXX
7299 format %{ "movq $dst, $mem\t# ptr" %}
7300 ins_encode %{
7301 __ movq($dst$$Register, $mem$$Address);
7302 %}
7303 ins_pipe(ialu_reg_mem); // XXX
7304 %}
7305
7306 // Load Compressed Pointer
7307 instruct loadN(rRegN dst, memory mem)
7308 %{
7309 predicate(n->as_Load()->barrier_data() == 0);
7310 match(Set dst (LoadN mem));
7311
7312 ins_cost(125); // XXX
7313 format %{ "movl $dst, $mem\t# compressed ptr" %}
7314 ins_encode %{
7315 __ movl($dst$$Register, $mem$$Address);
7316 %}
7317 ins_pipe(ialu_reg_mem); // XXX
7318 %}
7319
7320
7321 // Load Klass Pointer
7322 instruct loadKlass(rRegP dst, memory mem)
7323 %{
7324 match(Set dst (LoadKlass mem));
7325
7326 ins_cost(125); // XXX
7327 format %{ "movq $dst, $mem\t# class" %}
7328 ins_encode %{
7329 __ movq($dst$$Register, $mem$$Address);
7330 %}
7331 ins_pipe(ialu_reg_mem); // XXX
7332 %}
7333
7334 // Load narrow Klass Pointer
7335 instruct loadNKlass(rRegN dst, memory mem)
7336 %{
7337 predicate(!UseCompactObjectHeaders);
7338 match(Set dst (LoadNKlass mem));
7339
7340 ins_cost(125); // XXX
7341 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7342 ins_encode %{
7343 __ movl($dst$$Register, $mem$$Address);
7344 %}
7345 ins_pipe(ialu_reg_mem); // XXX
7346 %}
7347
7348 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7349 %{
7350 predicate(UseCompactObjectHeaders);
7351 match(Set dst (LoadNKlass mem));
7352 effect(KILL cr);
7353 ins_cost(125);
7354 format %{
7355 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7356 "shrl $dst, markWord::klass_shift_at_offset"
7357 %}
7358 ins_encode %{
7359 if (UseAPX) {
7360 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7361 }
7362 else {
7363 __ movl($dst$$Register, $mem$$Address);
7364 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7365 }
7366 %}
7367 ins_pipe(ialu_reg_mem);
7368 %}
7369
7370 // Load Float
7371 instruct loadF(regF dst, memory mem)
7372 %{
7373 match(Set dst (LoadF mem));
7374
7375 ins_cost(145); // XXX
7376 format %{ "movss $dst, $mem\t# float" %}
7377 ins_encode %{
7378 __ movflt($dst$$XMMRegister, $mem$$Address);
7379 %}
7380 ins_pipe(pipe_slow); // XXX
7381 %}
7382
7383 // Load Double
7384 instruct loadD_partial(regD dst, memory mem)
7385 %{
7386 predicate(!UseXmmLoadAndClearUpper);
7387 match(Set dst (LoadD mem));
7388
7389 ins_cost(145); // XXX
7390 format %{ "movlpd $dst, $mem\t# double" %}
7391 ins_encode %{
7392 __ movdbl($dst$$XMMRegister, $mem$$Address);
7393 %}
7394 ins_pipe(pipe_slow); // XXX
7395 %}
7396
7397 instruct loadD(regD dst, memory mem)
7398 %{
7399 predicate(UseXmmLoadAndClearUpper);
7400 match(Set dst (LoadD mem));
7401
7402 ins_cost(145); // XXX
7403 format %{ "movsd $dst, $mem\t# double" %}
7404 ins_encode %{
7405 __ movdbl($dst$$XMMRegister, $mem$$Address);
7406 %}
7407 ins_pipe(pipe_slow); // XXX
7408 %}
7409
7410 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7411 %{
7412 match(Set dst con);
7413
7414 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7415
7416 ins_encode %{
7417 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7418 %}
7419
7420 ins_pipe(ialu_reg_fat);
7421 %}
7422
7423 // min = java.lang.Math.min(float a, float b)
7424 // max = java.lang.Math.max(float a, float b)
7425 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7426 %{
7427 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7428 match(Set dst (MaxF a b));
7429 match(Set dst (MinF a b));
7430
7431 format %{ "minmaxF $dst, $a, $b" %}
7432 ins_encode %{
7433 int opcode = this->ideal_Opcode();
7434 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7435 %}
7436 ins_pipe( pipe_slow );
7437 %}
7438
7439 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
7440 %{
7441 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7442 match(Set dst (MaxF a b));
7443 match(Set dst (MinF a b));
7444 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7445
7446 format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7447 ins_encode %{
7448 int opcode = this->ideal_Opcode();
7449 bool min = (opcode == Op_MinF) ? true : false;
7450 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7451 min, fp_prec_flt /*pt*/);
7452 %}
7453 ins_pipe( pipe_slow );
7454 %}
7455
7456 // min = java.lang.Math.min(float a, float b)
7457 // max = java.lang.Math.max(float a, float b)
7458 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7459 %{
7460 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7461 match(Set dst (MaxF a b));
7462 match(Set dst (MinF a b));
7463 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7464
7465 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7466 ins_encode %{
7467 int opcode = this->ideal_Opcode();
7468 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7469 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7470 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7471 %}
7472 ins_pipe( pipe_slow );
7473 %}
7474
7475 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
7476 %{
7477 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7478 match(Set dst (MaxF a b));
7479 match(Set dst (MinF a b));
7480 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7481
7482 format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7483 ins_encode %{
7484 int opcode = this->ideal_Opcode();
7485 bool min = (opcode == Op_MinF) ? true : false;
7486 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7487 min, fp_prec_flt /*pt*/);
7488 %}
7489 ins_pipe( pipe_slow );
7490 %}
7491
7492 // min = java.lang.Math.min(double a, double b)
7493 // max = java.lang.Math.max(double a, double b)
7494 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7495 %{
7496 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7497 match(Set dst (MaxD a b));
7498 match(Set dst (MinD a b));
7499
7500 format %{ "minmaxD $dst, $a, $b" %}
7501 ins_encode %{
7502 int opcode = this->ideal_Opcode();
7503 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7504 %}
7505 ins_pipe( pipe_slow );
7506 %}
7507
7508 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
7509 %{
7510 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7511 match(Set dst (MaxD a b));
7512 match(Set dst (MinD a b));
7513 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7514
7515 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7516 ins_encode %{
7517 int opcode = this->ideal_Opcode();
7518 bool min = (opcode == Op_MinD) ? true : false;
7519 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7520 min, fp_prec_dbl /*pt*/);
7521 %}
7522 ins_pipe( pipe_slow );
7523 %}
7524
7525 // min = java.lang.Math.min(double a, double b)
7526 // max = java.lang.Math.max(double a, double b)
7527 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7528 %{
7529 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7530 match(Set dst (MaxD a b));
7531 match(Set dst (MinD a b));
7532 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7533
7534 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7535 ins_encode %{
7536 int opcode = this->ideal_Opcode();
7537 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7538 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7539 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7540 %}
7541 ins_pipe( pipe_slow );
7542 %}
7543
7544 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
7545 %{
7546 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7547 match(Set dst (MaxD a b));
7548 match(Set dst (MinD a b));
7549 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7550
7551 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7552 ins_encode %{
7553 int opcode = this->ideal_Opcode();
7554 bool min = (opcode == Op_MinD) ? true : false;
7555 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7556 min, fp_prec_dbl /*pt*/);
7557 %}
7558 ins_pipe( pipe_slow );
7559 %}
7560
7561 // Load Effective Address
7562 instruct leaP8(rRegP dst, indOffset8 mem)
7563 %{
7564 match(Set dst mem);
7565
7566 ins_cost(110); // XXX
7567 format %{ "leaq $dst, $mem\t# ptr 8" %}
7568 ins_encode %{
7569 __ leaq($dst$$Register, $mem$$Address);
7570 %}
7571 ins_pipe(ialu_reg_reg_fat);
7572 %}
7573
7574 instruct leaP32(rRegP dst, indOffset32 mem)
7575 %{
7576 match(Set dst mem);
7577
7578 ins_cost(110);
7579 format %{ "leaq $dst, $mem\t# ptr 32" %}
7580 ins_encode %{
7581 __ leaq($dst$$Register, $mem$$Address);
7582 %}
7583 ins_pipe(ialu_reg_reg_fat);
7584 %}
7585
7586 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7587 %{
7588 match(Set dst mem);
7589
7590 ins_cost(110);
7591 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7592 ins_encode %{
7593 __ leaq($dst$$Register, $mem$$Address);
7594 %}
7595 ins_pipe(ialu_reg_reg_fat);
7596 %}
7597
7598 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7599 %{
7600 match(Set dst mem);
7601
7602 ins_cost(110);
7603 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7604 ins_encode %{
7605 __ leaq($dst$$Register, $mem$$Address);
7606 %}
7607 ins_pipe(ialu_reg_reg_fat);
7608 %}
7609
7610 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7611 %{
7612 match(Set dst mem);
7613
7614 ins_cost(110);
7615 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7616 ins_encode %{
7617 __ leaq($dst$$Register, $mem$$Address);
7618 %}
7619 ins_pipe(ialu_reg_reg_fat);
7620 %}
7621
7622 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7623 %{
7624 match(Set dst mem);
7625
7626 ins_cost(110);
7627 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7628 ins_encode %{
7629 __ leaq($dst$$Register, $mem$$Address);
7630 %}
7631 ins_pipe(ialu_reg_reg_fat);
7632 %}
7633
7634 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7635 %{
7636 match(Set dst mem);
7637
7638 ins_cost(110);
7639 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7640 ins_encode %{
7641 __ leaq($dst$$Register, $mem$$Address);
7642 %}
7643 ins_pipe(ialu_reg_reg_fat);
7644 %}
7645
7646 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7647 %{
7648 match(Set dst mem);
7649
7650 ins_cost(110);
7651 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7652 ins_encode %{
7653 __ leaq($dst$$Register, $mem$$Address);
7654 %}
7655 ins_pipe(ialu_reg_reg_fat);
7656 %}
7657
7658 // Load Effective Address which uses Narrow (32-bits) oop
7659 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7660 %{
7661 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7662 match(Set dst mem);
7663
7664 ins_cost(110);
7665 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7666 ins_encode %{
7667 __ leaq($dst$$Register, $mem$$Address);
7668 %}
7669 ins_pipe(ialu_reg_reg_fat);
7670 %}
7671
7672 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7673 %{
7674 predicate(CompressedOops::shift() == 0);
7675 match(Set dst mem);
7676
7677 ins_cost(110); // XXX
7678 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7679 ins_encode %{
7680 __ leaq($dst$$Register, $mem$$Address);
7681 %}
7682 ins_pipe(ialu_reg_reg_fat);
7683 %}
7684
7685 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7686 %{
7687 predicate(CompressedOops::shift() == 0);
7688 match(Set dst mem);
7689
7690 ins_cost(110);
7691 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7692 ins_encode %{
7693 __ leaq($dst$$Register, $mem$$Address);
7694 %}
7695 ins_pipe(ialu_reg_reg_fat);
7696 %}
7697
7698 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7699 %{
7700 predicate(CompressedOops::shift() == 0);
7701 match(Set dst mem);
7702
7703 ins_cost(110);
7704 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7705 ins_encode %{
7706 __ leaq($dst$$Register, $mem$$Address);
7707 %}
7708 ins_pipe(ialu_reg_reg_fat);
7709 %}
7710
7711 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7712 %{
7713 predicate(CompressedOops::shift() == 0);
7714 match(Set dst mem);
7715
7716 ins_cost(110);
7717 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7718 ins_encode %{
7719 __ leaq($dst$$Register, $mem$$Address);
7720 %}
7721 ins_pipe(ialu_reg_reg_fat);
7722 %}
7723
7724 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7725 %{
7726 predicate(CompressedOops::shift() == 0);
7727 match(Set dst mem);
7728
7729 ins_cost(110);
7730 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7731 ins_encode %{
7732 __ leaq($dst$$Register, $mem$$Address);
7733 %}
7734 ins_pipe(ialu_reg_reg_fat);
7735 %}
7736
7737 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7738 %{
7739 predicate(CompressedOops::shift() == 0);
7740 match(Set dst mem);
7741
7742 ins_cost(110);
7743 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7744 ins_encode %{
7745 __ leaq($dst$$Register, $mem$$Address);
7746 %}
7747 ins_pipe(ialu_reg_reg_fat);
7748 %}
7749
7750 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7751 %{
7752 predicate(CompressedOops::shift() == 0);
7753 match(Set dst mem);
7754
7755 ins_cost(110);
7756 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7757 ins_encode %{
7758 __ leaq($dst$$Register, $mem$$Address);
7759 %}
7760 ins_pipe(ialu_reg_reg_fat);
7761 %}
7762
7763 instruct loadConI(rRegI dst, immI src)
7764 %{
7765 match(Set dst src);
7766
7767 format %{ "movl $dst, $src\t# int" %}
7768 ins_encode %{
7769 __ movl($dst$$Register, $src$$constant);
7770 %}
7771 ins_pipe(ialu_reg_fat); // XXX
7772 %}
7773
7774 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7775 %{
7776 match(Set dst src);
7777 effect(KILL cr);
7778
7779 ins_cost(50);
7780 format %{ "xorl $dst, $dst\t# int" %}
7781 ins_encode %{
7782 __ xorl($dst$$Register, $dst$$Register);
7783 %}
7784 ins_pipe(ialu_reg);
7785 %}
7786
7787 instruct loadConL(rRegL dst, immL src)
7788 %{
7789 match(Set dst src);
7790
7791 ins_cost(150);
7792 format %{ "movq $dst, $src\t# long" %}
7793 ins_encode %{
7794 __ mov64($dst$$Register, $src$$constant);
7795 %}
7796 ins_pipe(ialu_reg);
7797 %}
7798
7799 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7800 %{
7801 match(Set dst src);
7802 effect(KILL cr);
7803
7804 ins_cost(50);
7805 format %{ "xorl $dst, $dst\t# long" %}
7806 ins_encode %{
7807 __ xorl($dst$$Register, $dst$$Register);
7808 %}
7809 ins_pipe(ialu_reg); // XXX
7810 %}
7811
7812 instruct loadConUL32(rRegL dst, immUL32 src)
7813 %{
7814 match(Set dst src);
7815
7816 ins_cost(60);
7817 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7818 ins_encode %{
7819 __ movl($dst$$Register, $src$$constant);
7820 %}
7821 ins_pipe(ialu_reg);
7822 %}
7823
7824 instruct loadConL32(rRegL dst, immL32 src)
7825 %{
7826 match(Set dst src);
7827
7828 ins_cost(70);
7829 format %{ "movq $dst, $src\t# long (32-bit)" %}
7830 ins_encode %{
7831 __ movq($dst$$Register, $src$$constant);
7832 %}
7833 ins_pipe(ialu_reg);
7834 %}
7835
7836 instruct loadConP(rRegP dst, immP con) %{
7837 match(Set dst con);
7838
7839 format %{ "movq $dst, $con\t# ptr" %}
7840 ins_encode %{
7841 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7842 %}
7843 ins_pipe(ialu_reg_fat); // XXX
7844 %}
7845
7846 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7847 %{
7848 match(Set dst src);
7849 effect(KILL cr);
7850
7851 ins_cost(50);
7852 format %{ "xorl $dst, $dst\t# ptr" %}
7853 ins_encode %{
7854 __ xorl($dst$$Register, $dst$$Register);
7855 %}
7856 ins_pipe(ialu_reg);
7857 %}
7858
7859 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7860 %{
7861 match(Set dst src);
7862 effect(KILL cr);
7863
7864 ins_cost(60);
7865 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7866 ins_encode %{
7867 __ movl($dst$$Register, $src$$constant);
7868 %}
7869 ins_pipe(ialu_reg);
7870 %}
7871
7872 instruct loadConF(regF dst, immF con) %{
7873 match(Set dst con);
7874 ins_cost(125);
7875 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7876 ins_encode %{
7877 __ movflt($dst$$XMMRegister, $constantaddress($con));
7878 %}
7879 ins_pipe(pipe_slow);
7880 %}
7881
7882 instruct loadConH(regF dst, immH con) %{
7883 match(Set dst con);
7884 ins_cost(125);
7885 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7886 ins_encode %{
7887 __ movflt($dst$$XMMRegister, $constantaddress($con));
7888 %}
7889 ins_pipe(pipe_slow);
7890 %}
7891
7892 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7893 match(Set dst src);
7894 effect(KILL cr);
7895 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7896 ins_encode %{
7897 __ xorq($dst$$Register, $dst$$Register);
7898 %}
7899 ins_pipe(ialu_reg);
7900 %}
7901
7902 instruct loadConN(rRegN dst, immN src) %{
7903 match(Set dst src);
7904
7905 ins_cost(125);
7906 format %{ "movl $dst, $src\t# compressed ptr" %}
7907 ins_encode %{
7908 address con = (address)$src$$constant;
7909 if (con == nullptr) {
7910 ShouldNotReachHere();
7911 } else {
7912 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7913 }
7914 %}
7915 ins_pipe(ialu_reg_fat); // XXX
7916 %}
7917
7918 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7919 match(Set dst src);
7920
7921 ins_cost(125);
7922 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7923 ins_encode %{
7924 address con = (address)$src$$constant;
7925 if (con == nullptr) {
7926 ShouldNotReachHere();
7927 } else {
7928 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7929 }
7930 %}
7931 ins_pipe(ialu_reg_fat); // XXX
7932 %}
7933
7934 instruct loadConF0(regF dst, immF0 src)
7935 %{
7936 match(Set dst src);
7937 ins_cost(100);
7938
7939 format %{ "xorps $dst, $dst\t# float 0.0" %}
7940 ins_encode %{
7941 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7942 %}
7943 ins_pipe(pipe_slow);
7944 %}
7945
7946 // Use the same format since predicate() can not be used here.
7947 instruct loadConD(regD dst, immD con) %{
7948 match(Set dst con);
7949 ins_cost(125);
7950 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7951 ins_encode %{
7952 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7953 %}
7954 ins_pipe(pipe_slow);
7955 %}
7956
7957 instruct loadConD0(regD dst, immD0 src)
7958 %{
7959 match(Set dst src);
7960 ins_cost(100);
7961
7962 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7963 ins_encode %{
7964 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7965 %}
7966 ins_pipe(pipe_slow);
7967 %}
7968
7969 instruct loadSSI(rRegI dst, stackSlotI src)
7970 %{
7971 match(Set dst src);
7972
7973 ins_cost(125);
7974 format %{ "movl $dst, $src\t# int stk" %}
7975 ins_encode %{
7976 __ movl($dst$$Register, $src$$Address);
7977 %}
7978 ins_pipe(ialu_reg_mem);
7979 %}
7980
7981 instruct loadSSL(rRegL dst, stackSlotL src)
7982 %{
7983 match(Set dst src);
7984
7985 ins_cost(125);
7986 format %{ "movq $dst, $src\t# long stk" %}
7987 ins_encode %{
7988 __ movq($dst$$Register, $src$$Address);
7989 %}
7990 ins_pipe(ialu_reg_mem);
7991 %}
7992
7993 instruct loadSSP(rRegP dst, stackSlotP src)
7994 %{
7995 match(Set dst src);
7996
7997 ins_cost(125);
7998 format %{ "movq $dst, $src\t# ptr stk" %}
7999 ins_encode %{
8000 __ movq($dst$$Register, $src$$Address);
8001 %}
8002 ins_pipe(ialu_reg_mem);
8003 %}
8004
8005 instruct loadSSF(regF dst, stackSlotF src)
8006 %{
8007 match(Set dst src);
8008
8009 ins_cost(125);
8010 format %{ "movss $dst, $src\t# float stk" %}
8011 ins_encode %{
8012 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
8013 %}
8014 ins_pipe(pipe_slow); // XXX
8015 %}
8016
8017 // Use the same format since predicate() can not be used here.
8018 instruct loadSSD(regD dst, stackSlotD src)
8019 %{
8020 match(Set dst src);
8021
8022 ins_cost(125);
8023 format %{ "movsd $dst, $src\t# double stk" %}
8024 ins_encode %{
8025 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
8026 %}
8027 ins_pipe(pipe_slow); // XXX
8028 %}
8029
8030 // Prefetch instructions for allocation.
8031 // Must be safe to execute with invalid address (cannot fault).
8032
8033 instruct prefetchAlloc( memory mem ) %{
8034 predicate(AllocatePrefetchInstr==3);
8035 match(PrefetchAllocation mem);
8036 ins_cost(125);
8037
8038 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
8039 ins_encode %{
8040 __ prefetchw($mem$$Address);
8041 %}
8042 ins_pipe(ialu_mem);
8043 %}
8044
8045 instruct prefetchAllocNTA( memory mem ) %{
8046 predicate(AllocatePrefetchInstr==0);
8047 match(PrefetchAllocation mem);
8048 ins_cost(125);
8049
8050 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
8051 ins_encode %{
8052 __ prefetchnta($mem$$Address);
8053 %}
8054 ins_pipe(ialu_mem);
8055 %}
8056
8057 instruct prefetchAllocT0( memory mem ) %{
8058 predicate(AllocatePrefetchInstr==1);
8059 match(PrefetchAllocation mem);
8060 ins_cost(125);
8061
8062 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8063 ins_encode %{
8064 __ prefetcht0($mem$$Address);
8065 %}
8066 ins_pipe(ialu_mem);
8067 %}
8068
8069 instruct prefetchAllocT2( memory mem ) %{
8070 predicate(AllocatePrefetchInstr==2);
8071 match(PrefetchAllocation mem);
8072 ins_cost(125);
8073
8074 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8075 ins_encode %{
8076 __ prefetcht2($mem$$Address);
8077 %}
8078 ins_pipe(ialu_mem);
8079 %}
8080
8081 //----------Store Instructions-------------------------------------------------
8082
8083 // Store Byte
8084 instruct storeB(memory mem, rRegI src)
8085 %{
8086 match(Set mem (StoreB mem src));
8087
8088 ins_cost(125); // XXX
8089 format %{ "movb $mem, $src\t# byte" %}
8090 ins_encode %{
8091 __ movb($mem$$Address, $src$$Register);
8092 %}
8093 ins_pipe(ialu_mem_reg);
8094 %}
8095
8096 // Store Char/Short
8097 instruct storeC(memory mem, rRegI src)
8098 %{
8099 match(Set mem (StoreC mem src));
8100
8101 ins_cost(125); // XXX
8102 format %{ "movw $mem, $src\t# char/short" %}
8103 ins_encode %{
8104 __ movw($mem$$Address, $src$$Register);
8105 %}
8106 ins_pipe(ialu_mem_reg);
8107 %}
8108
8109 // Store Integer
8110 instruct storeI(memory mem, rRegI src)
8111 %{
8112 match(Set mem (StoreI mem src));
8113
8114 ins_cost(125); // XXX
8115 format %{ "movl $mem, $src\t# int" %}
8116 ins_encode %{
8117 __ movl($mem$$Address, $src$$Register);
8118 %}
8119 ins_pipe(ialu_mem_reg);
8120 %}
8121
8122 // Store Long
8123 instruct storeL(memory mem, rRegL src)
8124 %{
8125 match(Set mem (StoreL mem src));
8126
8127 ins_cost(125); // XXX
8128 format %{ "movq $mem, $src\t# long" %}
8129 ins_encode %{
8130 __ movq($mem$$Address, $src$$Register);
8131 %}
8132 ins_pipe(ialu_mem_reg); // XXX
8133 %}
8134
8135 // Store Pointer
8136 instruct storeP(memory mem, any_RegP src)
8137 %{
8138 predicate(n->as_Store()->barrier_data() == 0);
8139 match(Set mem (StoreP mem src));
8140
8141 ins_cost(125); // XXX
8142 format %{ "movq $mem, $src\t# ptr" %}
8143 ins_encode %{
8144 __ movq($mem$$Address, $src$$Register);
8145 %}
8146 ins_pipe(ialu_mem_reg);
8147 %}
8148
8149 instruct storeImmP0(memory mem, immP0 zero)
8150 %{
8151 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8152 match(Set mem (StoreP mem zero));
8153
8154 ins_cost(125); // XXX
8155 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8156 ins_encode %{
8157 __ movq($mem$$Address, r12);
8158 %}
8159 ins_pipe(ialu_mem_reg);
8160 %}
8161
8162 // Store Null Pointer, mark word, or other simple pointer constant.
8163 instruct storeImmP(memory mem, immP31 src)
8164 %{
8165 predicate(n->as_Store()->barrier_data() == 0);
8166 match(Set mem (StoreP mem src));
8167
8168 ins_cost(150); // XXX
8169 format %{ "movq $mem, $src\t# ptr" %}
8170 ins_encode %{
8171 __ movq($mem$$Address, $src$$constant);
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 // Store Compressed Pointer
8177 instruct storeN(memory mem, rRegN src)
8178 %{
8179 predicate(n->as_Store()->barrier_data() == 0);
8180 match(Set mem (StoreN mem src));
8181
8182 ins_cost(125); // XXX
8183 format %{ "movl $mem, $src\t# compressed ptr" %}
8184 ins_encode %{
8185 __ movl($mem$$Address, $src$$Register);
8186 %}
8187 ins_pipe(ialu_mem_reg);
8188 %}
8189
8190 instruct storeNKlass(memory mem, rRegN src)
8191 %{
8192 match(Set mem (StoreNKlass mem src));
8193
8194 ins_cost(125); // XXX
8195 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8196 ins_encode %{
8197 __ movl($mem$$Address, $src$$Register);
8198 %}
8199 ins_pipe(ialu_mem_reg);
8200 %}
8201
8202 instruct storeImmN0(memory mem, immN0 zero)
8203 %{
8204 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8205 match(Set mem (StoreN mem zero));
8206
8207 ins_cost(125); // XXX
8208 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8209 ins_encode %{
8210 __ movl($mem$$Address, r12);
8211 %}
8212 ins_pipe(ialu_mem_reg);
8213 %}
8214
8215 instruct storeImmN(memory mem, immN src)
8216 %{
8217 predicate(n->as_Store()->barrier_data() == 0);
8218 match(Set mem (StoreN mem src));
8219
8220 ins_cost(150); // XXX
8221 format %{ "movl $mem, $src\t# compressed ptr" %}
8222 ins_encode %{
8223 address con = (address)$src$$constant;
8224 if (con == nullptr) {
8225 __ movl($mem$$Address, 0);
8226 } else {
8227 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8228 }
8229 %}
8230 ins_pipe(ialu_mem_imm);
8231 %}
8232
8233 instruct storeImmNKlass(memory mem, immNKlass src)
8234 %{
8235 match(Set mem (StoreNKlass mem src));
8236
8237 ins_cost(150); // XXX
8238 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8239 ins_encode %{
8240 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8241 %}
8242 ins_pipe(ialu_mem_imm);
8243 %}
8244
8245 // Store Integer Immediate
8246 instruct storeImmI0(memory mem, immI_0 zero)
8247 %{
8248 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8249 match(Set mem (StoreI mem zero));
8250
8251 ins_cost(125); // XXX
8252 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8253 ins_encode %{
8254 __ movl($mem$$Address, r12);
8255 %}
8256 ins_pipe(ialu_mem_reg);
8257 %}
8258
8259 instruct storeImmI(memory mem, immI src)
8260 %{
8261 match(Set mem (StoreI mem src));
8262
8263 ins_cost(150);
8264 format %{ "movl $mem, $src\t# int" %}
8265 ins_encode %{
8266 __ movl($mem$$Address, $src$$constant);
8267 %}
8268 ins_pipe(ialu_mem_imm);
8269 %}
8270
8271 // Store Long Immediate
8272 instruct storeImmL0(memory mem, immL0 zero)
8273 %{
8274 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8275 match(Set mem (StoreL mem zero));
8276
8277 ins_cost(125); // XXX
8278 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8279 ins_encode %{
8280 __ movq($mem$$Address, r12);
8281 %}
8282 ins_pipe(ialu_mem_reg);
8283 %}
8284
8285 instruct storeImmL(memory mem, immL32 src)
8286 %{
8287 match(Set mem (StoreL mem src));
8288
8289 ins_cost(150);
8290 format %{ "movq $mem, $src\t# long" %}
8291 ins_encode %{
8292 __ movq($mem$$Address, $src$$constant);
8293 %}
8294 ins_pipe(ialu_mem_imm);
8295 %}
8296
8297 // Store Short/Char Immediate
8298 instruct storeImmC0(memory mem, immI_0 zero)
8299 %{
8300 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8301 match(Set mem (StoreC mem zero));
8302
8303 ins_cost(125); // XXX
8304 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8305 ins_encode %{
8306 __ movw($mem$$Address, r12);
8307 %}
8308 ins_pipe(ialu_mem_reg);
8309 %}
8310
8311 instruct storeImmI16(memory mem, immI16 src)
8312 %{
8313 predicate(UseStoreImmI16);
8314 match(Set mem (StoreC mem src));
8315
8316 ins_cost(150);
8317 format %{ "movw $mem, $src\t# short/char" %}
8318 ins_encode %{
8319 __ movw($mem$$Address, $src$$constant);
8320 %}
8321 ins_pipe(ialu_mem_imm);
8322 %}
8323
8324 // Store Byte Immediate
8325 instruct storeImmB0(memory mem, immI_0 zero)
8326 %{
8327 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8328 match(Set mem (StoreB mem zero));
8329
8330 ins_cost(125); // XXX
8331 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8332 ins_encode %{
8333 __ movb($mem$$Address, r12);
8334 %}
8335 ins_pipe(ialu_mem_reg);
8336 %}
8337
8338 instruct storeImmB(memory mem, immI8 src)
8339 %{
8340 match(Set mem (StoreB mem src));
8341
8342 ins_cost(150); // XXX
8343 format %{ "movb $mem, $src\t# byte" %}
8344 ins_encode %{
8345 __ movb($mem$$Address, $src$$constant);
8346 %}
8347 ins_pipe(ialu_mem_imm);
8348 %}
8349
8350 // Store Float
8351 instruct storeF(memory mem, regF src)
8352 %{
8353 match(Set mem (StoreF mem src));
8354
8355 ins_cost(95); // XXX
8356 format %{ "movss $mem, $src\t# float" %}
8357 ins_encode %{
8358 __ movflt($mem$$Address, $src$$XMMRegister);
8359 %}
8360 ins_pipe(pipe_slow); // XXX
8361 %}
8362
8363 // Store immediate Float value (it is faster than store from XMM register)
8364 instruct storeF0(memory mem, immF0 zero)
8365 %{
8366 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8367 match(Set mem (StoreF mem zero));
8368
8369 ins_cost(25); // XXX
8370 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8371 ins_encode %{
8372 __ movl($mem$$Address, r12);
8373 %}
8374 ins_pipe(ialu_mem_reg);
8375 %}
8376
8377 instruct storeF_imm(memory mem, immF src)
8378 %{
8379 match(Set mem (StoreF mem src));
8380
8381 ins_cost(50);
8382 format %{ "movl $mem, $src\t# float" %}
8383 ins_encode %{
8384 __ movl($mem$$Address, jint_cast($src$$constant));
8385 %}
8386 ins_pipe(ialu_mem_imm);
8387 %}
8388
8389 // Store Double
8390 instruct storeD(memory mem, regD src)
8391 %{
8392 match(Set mem (StoreD mem src));
8393
8394 ins_cost(95); // XXX
8395 format %{ "movsd $mem, $src\t# double" %}
8396 ins_encode %{
8397 __ movdbl($mem$$Address, $src$$XMMRegister);
8398 %}
8399 ins_pipe(pipe_slow); // XXX
8400 %}
8401
8402 // Store immediate double 0.0 (it is faster than store from XMM register)
8403 instruct storeD0_imm(memory mem, immD0 src)
8404 %{
8405 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8406 match(Set mem (StoreD mem src));
8407
8408 ins_cost(50);
8409 format %{ "movq $mem, $src\t# double 0." %}
8410 ins_encode %{
8411 __ movq($mem$$Address, $src$$constant);
8412 %}
8413 ins_pipe(ialu_mem_imm);
8414 %}
8415
8416 instruct storeD0(memory mem, immD0 zero)
8417 %{
8418 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8419 match(Set mem (StoreD mem zero));
8420
8421 ins_cost(25); // XXX
8422 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8423 ins_encode %{
8424 __ movq($mem$$Address, r12);
8425 %}
8426 ins_pipe(ialu_mem_reg);
8427 %}
8428
8429 instruct storeSSI(stackSlotI dst, rRegI src)
8430 %{
8431 match(Set dst src);
8432
8433 ins_cost(100);
8434 format %{ "movl $dst, $src\t# int stk" %}
8435 ins_encode %{
8436 __ movl($dst$$Address, $src$$Register);
8437 %}
8438 ins_pipe( ialu_mem_reg );
8439 %}
8440
8441 instruct storeSSL(stackSlotL dst, rRegL src)
8442 %{
8443 match(Set dst src);
8444
8445 ins_cost(100);
8446 format %{ "movq $dst, $src\t# long stk" %}
8447 ins_encode %{
8448 __ movq($dst$$Address, $src$$Register);
8449 %}
8450 ins_pipe(ialu_mem_reg);
8451 %}
8452
8453 instruct storeSSP(stackSlotP dst, rRegP src)
8454 %{
8455 match(Set dst src);
8456
8457 ins_cost(100);
8458 format %{ "movq $dst, $src\t# ptr stk" %}
8459 ins_encode %{
8460 __ movq($dst$$Address, $src$$Register);
8461 %}
8462 ins_pipe(ialu_mem_reg);
8463 %}
8464
8465 instruct storeSSF(stackSlotF dst, regF src)
8466 %{
8467 match(Set dst src);
8468
8469 ins_cost(95); // XXX
8470 format %{ "movss $dst, $src\t# float stk" %}
8471 ins_encode %{
8472 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8473 %}
8474 ins_pipe(pipe_slow); // XXX
8475 %}
8476
8477 instruct storeSSD(stackSlotD dst, regD src)
8478 %{
8479 match(Set dst src);
8480
8481 ins_cost(95); // XXX
8482 format %{ "movsd $dst, $src\t# double stk" %}
8483 ins_encode %{
8484 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8485 %}
8486 ins_pipe(pipe_slow); // XXX
8487 %}
8488
8489 instruct cacheWB(indirect addr)
8490 %{
8491 predicate(VM_Version::supports_data_cache_line_flush());
8492 match(CacheWB addr);
8493
8494 ins_cost(100);
8495 format %{"cache wb $addr" %}
8496 ins_encode %{
8497 assert($addr->index_position() < 0, "should be");
8498 assert($addr$$disp == 0, "should be");
8499 __ cache_wb(Address($addr$$base$$Register, 0));
8500 %}
8501 ins_pipe(pipe_slow); // XXX
8502 %}
8503
8504 instruct cacheWBPreSync()
8505 %{
8506 predicate(VM_Version::supports_data_cache_line_flush());
8507 match(CacheWBPreSync);
8508
8509 ins_cost(100);
8510 format %{"cache wb presync" %}
8511 ins_encode %{
8512 __ cache_wbsync(true);
8513 %}
8514 ins_pipe(pipe_slow); // XXX
8515 %}
8516
8517 instruct cacheWBPostSync()
8518 %{
8519 predicate(VM_Version::supports_data_cache_line_flush());
8520 match(CacheWBPostSync);
8521
8522 ins_cost(100);
8523 format %{"cache wb postsync" %}
8524 ins_encode %{
8525 __ cache_wbsync(false);
8526 %}
8527 ins_pipe(pipe_slow); // XXX
8528 %}
8529
8530 //----------BSWAP Instructions-------------------------------------------------
8531 instruct bytes_reverse_int(rRegI dst) %{
8532 match(Set dst (ReverseBytesI dst));
8533
8534 format %{ "bswapl $dst" %}
8535 ins_encode %{
8536 __ bswapl($dst$$Register);
8537 %}
8538 ins_pipe( ialu_reg );
8539 %}
8540
8541 instruct bytes_reverse_long(rRegL dst) %{
8542 match(Set dst (ReverseBytesL dst));
8543
8544 format %{ "bswapq $dst" %}
8545 ins_encode %{
8546 __ bswapq($dst$$Register);
8547 %}
8548 ins_pipe( ialu_reg);
8549 %}
8550
8551 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8552 match(Set dst (ReverseBytesUS dst));
8553 effect(KILL cr);
8554
8555 format %{ "bswapl $dst\n\t"
8556 "shrl $dst,16\n\t" %}
8557 ins_encode %{
8558 __ bswapl($dst$$Register);
8559 __ shrl($dst$$Register, 16);
8560 %}
8561 ins_pipe( ialu_reg );
8562 %}
8563
8564 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8565 match(Set dst (ReverseBytesS dst));
8566 effect(KILL cr);
8567
8568 format %{ "bswapl $dst\n\t"
8569 "sar $dst,16\n\t" %}
8570 ins_encode %{
8571 __ bswapl($dst$$Register);
8572 __ sarl($dst$$Register, 16);
8573 %}
8574 ins_pipe( ialu_reg );
8575 %}
8576
8577 //---------- Zeros Count Instructions ------------------------------------------
8578
8579 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8580 predicate(UseCountLeadingZerosInstruction);
8581 match(Set dst (CountLeadingZerosI src));
8582 effect(KILL cr);
8583
8584 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8585 ins_encode %{
8586 __ lzcntl($dst$$Register, $src$$Register);
8587 %}
8588 ins_pipe(ialu_reg);
8589 %}
8590
8591 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8592 predicate(UseCountLeadingZerosInstruction);
8593 match(Set dst (CountLeadingZerosI (LoadI src)));
8594 effect(KILL cr);
8595 ins_cost(175);
8596 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8597 ins_encode %{
8598 __ lzcntl($dst$$Register, $src$$Address);
8599 %}
8600 ins_pipe(ialu_reg_mem);
8601 %}
8602
8603 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8604 predicate(!UseCountLeadingZerosInstruction);
8605 match(Set dst (CountLeadingZerosI src));
8606 effect(KILL cr);
8607
8608 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8609 "jnz skip\n\t"
8610 "movl $dst, -1\n"
8611 "skip:\n\t"
8612 "negl $dst\n\t"
8613 "addl $dst, 31" %}
8614 ins_encode %{
8615 Register Rdst = $dst$$Register;
8616 Register Rsrc = $src$$Register;
8617 Label skip;
8618 __ bsrl(Rdst, Rsrc);
8619 __ jccb(Assembler::notZero, skip);
8620 __ movl(Rdst, -1);
8621 __ bind(skip);
8622 __ negl(Rdst);
8623 __ addl(Rdst, BitsPerInt - 1);
8624 %}
8625 ins_pipe(ialu_reg);
8626 %}
8627
8628 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8629 predicate(UseCountLeadingZerosInstruction);
8630 match(Set dst (CountLeadingZerosL src));
8631 effect(KILL cr);
8632
8633 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8634 ins_encode %{
8635 __ lzcntq($dst$$Register, $src$$Register);
8636 %}
8637 ins_pipe(ialu_reg);
8638 %}
8639
8640 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8641 predicate(UseCountLeadingZerosInstruction);
8642 match(Set dst (CountLeadingZerosL (LoadL src)));
8643 effect(KILL cr);
8644 ins_cost(175);
8645 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8646 ins_encode %{
8647 __ lzcntq($dst$$Register, $src$$Address);
8648 %}
8649 ins_pipe(ialu_reg_mem);
8650 %}
8651
8652 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8653 predicate(!UseCountLeadingZerosInstruction);
8654 match(Set dst (CountLeadingZerosL src));
8655 effect(KILL cr);
8656
8657 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8658 "jnz skip\n\t"
8659 "movl $dst, -1\n"
8660 "skip:\n\t"
8661 "negl $dst\n\t"
8662 "addl $dst, 63" %}
8663 ins_encode %{
8664 Register Rdst = $dst$$Register;
8665 Register Rsrc = $src$$Register;
8666 Label skip;
8667 __ bsrq(Rdst, Rsrc);
8668 __ jccb(Assembler::notZero, skip);
8669 __ movl(Rdst, -1);
8670 __ bind(skip);
8671 __ negl(Rdst);
8672 __ addl(Rdst, BitsPerLong - 1);
8673 %}
8674 ins_pipe(ialu_reg);
8675 %}
8676
8677 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8678 predicate(UseCountTrailingZerosInstruction);
8679 match(Set dst (CountTrailingZerosI src));
8680 effect(KILL cr);
8681
8682 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8683 ins_encode %{
8684 __ tzcntl($dst$$Register, $src$$Register);
8685 %}
8686 ins_pipe(ialu_reg);
8687 %}
8688
8689 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8690 predicate(UseCountTrailingZerosInstruction);
8691 match(Set dst (CountTrailingZerosI (LoadI src)));
8692 effect(KILL cr);
8693 ins_cost(175);
8694 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8695 ins_encode %{
8696 __ tzcntl($dst$$Register, $src$$Address);
8697 %}
8698 ins_pipe(ialu_reg_mem);
8699 %}
8700
8701 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8702 predicate(!UseCountTrailingZerosInstruction);
8703 match(Set dst (CountTrailingZerosI src));
8704 effect(KILL cr);
8705
8706 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8707 "jnz done\n\t"
8708 "movl $dst, 32\n"
8709 "done:" %}
8710 ins_encode %{
8711 Register Rdst = $dst$$Register;
8712 Label done;
8713 __ bsfl(Rdst, $src$$Register);
8714 __ jccb(Assembler::notZero, done);
8715 __ movl(Rdst, BitsPerInt);
8716 __ bind(done);
8717 %}
8718 ins_pipe(ialu_reg);
8719 %}
8720
8721 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8722 predicate(UseCountTrailingZerosInstruction);
8723 match(Set dst (CountTrailingZerosL src));
8724 effect(KILL cr);
8725
8726 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8727 ins_encode %{
8728 __ tzcntq($dst$$Register, $src$$Register);
8729 %}
8730 ins_pipe(ialu_reg);
8731 %}
8732
8733 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8734 predicate(UseCountTrailingZerosInstruction);
8735 match(Set dst (CountTrailingZerosL (LoadL src)));
8736 effect(KILL cr);
8737 ins_cost(175);
8738 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8739 ins_encode %{
8740 __ tzcntq($dst$$Register, $src$$Address);
8741 %}
8742 ins_pipe(ialu_reg_mem);
8743 %}
8744
8745 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8746 predicate(!UseCountTrailingZerosInstruction);
8747 match(Set dst (CountTrailingZerosL src));
8748 effect(KILL cr);
8749
8750 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8751 "jnz done\n\t"
8752 "movl $dst, 64\n"
8753 "done:" %}
8754 ins_encode %{
8755 Register Rdst = $dst$$Register;
8756 Label done;
8757 __ bsfq(Rdst, $src$$Register);
8758 __ jccb(Assembler::notZero, done);
8759 __ movl(Rdst, BitsPerLong);
8760 __ bind(done);
8761 %}
8762 ins_pipe(ialu_reg);
8763 %}
8764
8765 //--------------- Reverse Operation Instructions ----------------
8766 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8767 predicate(!VM_Version::supports_gfni());
8768 match(Set dst (ReverseI src));
8769 effect(TEMP dst, TEMP rtmp, KILL cr);
8770 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8771 ins_encode %{
8772 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8773 %}
8774 ins_pipe( ialu_reg );
8775 %}
8776
8777 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8778 predicate(VM_Version::supports_gfni());
8779 match(Set dst (ReverseI src));
8780 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8781 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8782 ins_encode %{
8783 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8784 %}
8785 ins_pipe( ialu_reg );
8786 %}
8787
8788 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8789 predicate(!VM_Version::supports_gfni());
8790 match(Set dst (ReverseL src));
8791 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8792 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8793 ins_encode %{
8794 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8795 %}
8796 ins_pipe( ialu_reg );
8797 %}
8798
8799 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8800 predicate(VM_Version::supports_gfni());
8801 match(Set dst (ReverseL src));
8802 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8803 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8804 ins_encode %{
8805 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8806 %}
8807 ins_pipe( ialu_reg );
8808 %}
8809
8810 //---------- Population Count Instructions -------------------------------------
8811
8812 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8813 predicate(UsePopCountInstruction);
8814 match(Set dst (PopCountI src));
8815 effect(KILL cr);
8816
8817 format %{ "popcnt $dst, $src" %}
8818 ins_encode %{
8819 __ popcntl($dst$$Register, $src$$Register);
8820 %}
8821 ins_pipe(ialu_reg);
8822 %}
8823
8824 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8825 predicate(UsePopCountInstruction);
8826 match(Set dst (PopCountI (LoadI mem)));
8827 effect(KILL cr);
8828
8829 format %{ "popcnt $dst, $mem" %}
8830 ins_encode %{
8831 __ popcntl($dst$$Register, $mem$$Address);
8832 %}
8833 ins_pipe(ialu_reg);
8834 %}
8835
8836 // Note: Long.bitCount(long) returns an int.
8837 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8838 predicate(UsePopCountInstruction);
8839 match(Set dst (PopCountL src));
8840 effect(KILL cr);
8841
8842 format %{ "popcnt $dst, $src" %}
8843 ins_encode %{
8844 __ popcntq($dst$$Register, $src$$Register);
8845 %}
8846 ins_pipe(ialu_reg);
8847 %}
8848
8849 // Note: Long.bitCount(long) returns an int.
8850 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8851 predicate(UsePopCountInstruction);
8852 match(Set dst (PopCountL (LoadL mem)));
8853 effect(KILL cr);
8854
8855 format %{ "popcnt $dst, $mem" %}
8856 ins_encode %{
8857 __ popcntq($dst$$Register, $mem$$Address);
8858 %}
8859 ins_pipe(ialu_reg);
8860 %}
8861
8862
8863 //----------MemBar Instructions-----------------------------------------------
8864 // Memory barrier flavors
8865
8866 instruct membar_acquire()
8867 %{
8868 match(MemBarAcquire);
8869 match(LoadFence);
8870 ins_cost(0);
8871
8872 size(0);
8873 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8874 ins_encode();
8875 ins_pipe(empty);
8876 %}
8877
8878 instruct membar_acquire_lock()
8879 %{
8880 match(MemBarAcquireLock);
8881 ins_cost(0);
8882
8883 size(0);
8884 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8885 ins_encode();
8886 ins_pipe(empty);
8887 %}
8888
8889 instruct membar_release()
8890 %{
8891 match(MemBarRelease);
8892 match(StoreFence);
8893 ins_cost(0);
8894
8895 size(0);
8896 format %{ "MEMBAR-release ! (empty encoding)" %}
8897 ins_encode();
8898 ins_pipe(empty);
8899 %}
8900
8901 instruct membar_release_lock()
8902 %{
8903 match(MemBarReleaseLock);
8904 ins_cost(0);
8905
8906 size(0);
8907 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8908 ins_encode();
8909 ins_pipe(empty);
8910 %}
8911
8912 instruct membar_storeload(rFlagsReg cr) %{
8913 match(MemBarStoreLoad);
8914 effect(KILL cr);
8915 ins_cost(400);
8916
8917 format %{
8918 $$template
8919 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8920 %}
8921 ins_encode %{
8922 __ membar(Assembler::StoreLoad);
8923 %}
8924 ins_pipe(pipe_slow);
8925 %}
8926
8927 instruct membar_volatile(rFlagsReg cr) %{
8928 match(MemBarVolatile);
8929 effect(KILL cr);
8930 ins_cost(400);
8931
8932 format %{
8933 $$template
8934 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8935 %}
8936 ins_encode %{
8937 __ membar(Assembler::StoreLoad);
8938 %}
8939 ins_pipe(pipe_slow);
8940 %}
8941
8942 instruct unnecessary_membar_volatile()
8943 %{
8944 match(MemBarVolatile);
8945 predicate(Matcher::post_store_load_barrier(n));
8946 ins_cost(0);
8947
8948 size(0);
8949 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8950 ins_encode();
8951 ins_pipe(empty);
8952 %}
8953
8954 instruct membar_full(rFlagsReg cr) %{
8955 match(MemBarFull);
8956 effect(KILL cr);
8957 ins_cost(400);
8958
8959 format %{
8960 $$template
8961 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8962 %}
8963 ins_encode %{
8964 __ membar(Assembler::StoreLoad);
8965 %}
8966 ins_pipe(pipe_slow);
8967 %}
8968
8969 instruct membar_storestore() %{
8970 match(MemBarStoreStore);
8971 match(StoreStoreFence);
8972 ins_cost(0);
8973
8974 size(0);
8975 format %{ "MEMBAR-storestore (empty encoding)" %}
8976 ins_encode( );
8977 ins_pipe(empty);
8978 %}
8979
8980 //----------Move Instructions--------------------------------------------------
8981
8982 instruct castX2P(rRegP dst, rRegL src)
8983 %{
8984 match(Set dst (CastX2P src));
8985
8986 format %{ "movq $dst, $src\t# long->ptr" %}
8987 ins_encode %{
8988 if ($dst$$reg != $src$$reg) {
8989 __ movptr($dst$$Register, $src$$Register);
8990 }
8991 %}
8992 ins_pipe(ialu_reg_reg); // XXX
8993 %}
8994
8995 instruct castI2N(rRegN dst, rRegI src)
8996 %{
8997 match(Set dst (CastI2N src));
8998
8999 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
9000 ins_encode %{
9001 if ($dst$$reg != $src$$reg) {
9002 __ movl($dst$$Register, $src$$Register);
9003 }
9004 %}
9005 ins_pipe(ialu_reg_reg); // XXX
9006 %}
9007
9008 instruct castN2X(rRegL dst, rRegN src)
9009 %{
9010 match(Set dst (CastP2X src));
9011
9012 format %{ "movq $dst, $src\t# ptr -> long" %}
9013 ins_encode %{
9014 if ($dst$$reg != $src$$reg) {
9015 __ movptr($dst$$Register, $src$$Register);
9016 }
9017 %}
9018 ins_pipe(ialu_reg_reg); // XXX
9019 %}
9020
9021 instruct castP2X(rRegL dst, rRegP src)
9022 %{
9023 match(Set dst (CastP2X src));
9024
9025 format %{ "movq $dst, $src\t# ptr -> long" %}
9026 ins_encode %{
9027 if ($dst$$reg != $src$$reg) {
9028 __ movptr($dst$$Register, $src$$Register);
9029 }
9030 %}
9031 ins_pipe(ialu_reg_reg); // XXX
9032 %}
9033
9034 // Convert oop into int for vectors alignment masking
9035 instruct convP2I(rRegI dst, rRegP src)
9036 %{
9037 match(Set dst (ConvL2I (CastP2X src)));
9038
9039 format %{ "movl $dst, $src\t# ptr -> int" %}
9040 ins_encode %{
9041 __ movl($dst$$Register, $src$$Register);
9042 %}
9043 ins_pipe(ialu_reg_reg); // XXX
9044 %}
9045
9046 // Convert compressed oop into int for vectors alignment masking
9047 // in case of 32bit oops (heap < 4Gb).
9048 instruct convN2I(rRegI dst, rRegN src)
9049 %{
9050 predicate(CompressedOops::shift() == 0);
9051 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9052
9053 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
9054 ins_encode %{
9055 __ movl($dst$$Register, $src$$Register);
9056 %}
9057 ins_pipe(ialu_reg_reg); // XXX
9058 %}
9059
9060 // Convert oop pointer into compressed form
9061 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
9062 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9063 match(Set dst (EncodeP src));
9064 effect(KILL cr);
9065 format %{ "encode_heap_oop $dst,$src" %}
9066 ins_encode %{
9067 Register s = $src$$Register;
9068 Register d = $dst$$Register;
9069 if (s != d) {
9070 __ movq(d, s);
9071 }
9072 __ encode_heap_oop(d);
9073 %}
9074 ins_pipe(ialu_reg_long);
9075 %}
9076
9077 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9078 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9079 match(Set dst (EncodeP src));
9080 effect(KILL cr);
9081 format %{ "encode_heap_oop_not_null $dst,$src" %}
9082 ins_encode %{
9083 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9084 %}
9085 ins_pipe(ialu_reg_long);
9086 %}
9087
9088 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9089 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9090 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9091 match(Set dst (DecodeN src));
9092 effect(KILL cr);
9093 format %{ "decode_heap_oop $dst,$src" %}
9094 ins_encode %{
9095 Register s = $src$$Register;
9096 Register d = $dst$$Register;
9097 if (s != d) {
9098 __ movq(d, s);
9099 }
9100 __ decode_heap_oop(d);
9101 %}
9102 ins_pipe(ialu_reg_long);
9103 %}
9104
9105 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9106 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9107 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9108 match(Set dst (DecodeN src));
9109 effect(KILL cr);
9110 format %{ "decode_heap_oop_not_null $dst,$src" %}
9111 ins_encode %{
9112 Register s = $src$$Register;
9113 Register d = $dst$$Register;
9114 if (s != d) {
9115 __ decode_heap_oop_not_null(d, s);
9116 } else {
9117 __ decode_heap_oop_not_null(d);
9118 }
9119 %}
9120 ins_pipe(ialu_reg_long);
9121 %}
9122
9123 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9124 match(Set dst (EncodePKlass src));
9125 effect(TEMP dst, KILL cr);
9126 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9127 ins_encode %{
9128 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9129 %}
9130 ins_pipe(ialu_reg_long);
9131 %}
9132
9133 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9134 match(Set dst (DecodeNKlass src));
9135 effect(TEMP dst, KILL cr);
9136 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9137 ins_encode %{
9138 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9139 %}
9140 ins_pipe(ialu_reg_long);
9141 %}
9142
9143 //----------Conditional Move---------------------------------------------------
9144 // Jump
9145 // dummy instruction for generating temp registers
9146 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9147 match(Jump (LShiftL switch_val shift));
9148 ins_cost(350);
9149 predicate(false);
9150 effect(TEMP dest);
9151
9152 format %{ "leaq $dest, [$constantaddress]\n\t"
9153 "jmp [$dest + $switch_val << $shift]\n\t" %}
9154 ins_encode %{
9155 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9156 // to do that and the compiler is using that register as one it can allocate.
9157 // So we build it all by hand.
9158 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9159 // ArrayAddress dispatch(table, index);
9160 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9161 __ lea($dest$$Register, $constantaddress);
9162 __ jmp(dispatch);
9163 %}
9164 ins_pipe(pipe_jmp);
9165 %}
9166
9167 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9168 match(Jump (AddL (LShiftL switch_val shift) offset));
9169 ins_cost(350);
9170 effect(TEMP dest);
9171
9172 format %{ "leaq $dest, [$constantaddress]\n\t"
9173 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9174 ins_encode %{
9175 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9176 // to do that and the compiler is using that register as one it can allocate.
9177 // So we build it all by hand.
9178 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9179 // ArrayAddress dispatch(table, index);
9180 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9181 __ lea($dest$$Register, $constantaddress);
9182 __ jmp(dispatch);
9183 %}
9184 ins_pipe(pipe_jmp);
9185 %}
9186
9187 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9188 match(Jump switch_val);
9189 ins_cost(350);
9190 effect(TEMP dest);
9191
9192 format %{ "leaq $dest, [$constantaddress]\n\t"
9193 "jmp [$dest + $switch_val]\n\t" %}
9194 ins_encode %{
9195 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9196 // to do that and the compiler is using that register as one it can allocate.
9197 // So we build it all by hand.
9198 // Address index(noreg, switch_reg, Address::times_1);
9199 // ArrayAddress dispatch(table, index);
9200 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9201 __ lea($dest$$Register, $constantaddress);
9202 __ jmp(dispatch);
9203 %}
9204 ins_pipe(pipe_jmp);
9205 %}
9206
9207 // Conditional move
9208 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9209 %{
9210 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9211 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9212
9213 ins_cost(100); // XXX
9214 format %{ "setbn$cop $dst\t# signed, int" %}
9215 ins_encode %{
9216 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9217 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9218 %}
9219 ins_pipe(ialu_reg);
9220 %}
9221
9222 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9223 %{
9224 predicate(!UseAPX);
9225 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9226
9227 ins_cost(200); // XXX
9228 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9229 ins_encode %{
9230 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9231 %}
9232 ins_pipe(pipe_cmov_reg);
9233 %}
9234
9235 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9236 %{
9237 predicate(UseAPX);
9238 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9239
9240 ins_cost(200);
9241 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9242 ins_encode %{
9243 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9244 %}
9245 ins_pipe(pipe_cmov_reg);
9246 %}
9247
9248 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9249 %{
9250 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9251 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9252
9253 ins_cost(100); // XXX
9254 format %{ "setbn$cop $dst\t# unsigned, int" %}
9255 ins_encode %{
9256 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9257 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9258 %}
9259 ins_pipe(ialu_reg);
9260 %}
9261
9262 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9263 predicate(!UseAPX);
9264 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9265
9266 ins_cost(200); // XXX
9267 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9268 ins_encode %{
9269 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9270 %}
9271 ins_pipe(pipe_cmov_reg);
9272 %}
9273
9274 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9275 predicate(UseAPX);
9276 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9277
9278 ins_cost(200);
9279 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9280 ins_encode %{
9281 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9282 %}
9283 ins_pipe(pipe_cmov_reg);
9284 %}
9285
9286 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9287 %{
9288 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9289 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9290
9291 ins_cost(100); // XXX
9292 format %{ "setbn$cop $dst\t# unsigned, int" %}
9293 ins_encode %{
9294 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9295 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9296 %}
9297 ins_pipe(ialu_reg);
9298 %}
9299
9300 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9301 %{
9302 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9303 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9304
9305 ins_cost(100); // XXX
9306 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9307 ins_encode %{
9308 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9309 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9310 %}
9311 ins_pipe(ialu_reg);
9312 %}
9313
9314 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9315 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9316
9317 ins_cost(200);
9318 expand %{
9319 cmovI_regU(cop, cr, dst, src);
9320 %}
9321 %}
9322
9323 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9324 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9325
9326 ins_cost(200);
9327 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9328 ins_encode %{
9329 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9330 %}
9331 ins_pipe(pipe_cmov_reg);
9332 %}
9333
9334 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9335 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9336 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9337
9338 ins_cost(200); // XXX
9339 format %{ "cmovpl $dst, $src\n\t"
9340 "cmovnel $dst, $src" %}
9341 ins_encode %{
9342 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9343 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9344 %}
9345 ins_pipe(pipe_cmov_reg);
9346 %}
9347
9348 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9349 // inputs of the CMove
9350 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9351 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9352 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9353 effect(TEMP dst);
9354
9355 ins_cost(200); // XXX
9356 format %{ "cmovpl $dst, $src\n\t"
9357 "cmovnel $dst, $src" %}
9358 ins_encode %{
9359 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9360 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9361 %}
9362 ins_pipe(pipe_cmov_reg);
9363 %}
9364
9365 // Conditional move
9366 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9367 predicate(!UseAPX);
9368 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9369
9370 ins_cost(250); // XXX
9371 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9372 ins_encode %{
9373 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9374 %}
9375 ins_pipe(pipe_cmov_mem);
9376 %}
9377
9378 // Conditional move
9379 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9380 %{
9381 predicate(UseAPX);
9382 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9383
9384 ins_cost(250);
9385 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9386 ins_encode %{
9387 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9388 %}
9389 ins_pipe(pipe_cmov_mem);
9390 %}
9391
9392 // Conditional move
9393 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9394 %{
9395 predicate(!UseAPX);
9396 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9397
9398 ins_cost(250); // XXX
9399 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9400 ins_encode %{
9401 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9402 %}
9403 ins_pipe(pipe_cmov_mem);
9404 %}
9405
9406 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9407 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9408
9409 ins_cost(250);
9410 expand %{
9411 cmovI_memU(cop, cr, dst, src);
9412 %}
9413 %}
9414
9415 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9416 %{
9417 predicate(UseAPX);
9418 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9419
9420 ins_cost(250);
9421 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9422 ins_encode %{
9423 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9424 %}
9425 ins_pipe(pipe_cmov_mem);
9426 %}
9427
9428 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9429 %{
9430 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9431
9432 ins_cost(250);
9433 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9434 ins_encode %{
9435 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9436 %}
9437 ins_pipe(pipe_cmov_mem);
9438 %}
9439
9440 // Conditional move
9441 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9442 %{
9443 predicate(!UseAPX);
9444 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9445
9446 ins_cost(200); // XXX
9447 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9448 ins_encode %{
9449 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9450 %}
9451 ins_pipe(pipe_cmov_reg);
9452 %}
9453
9454 // Conditional move ndd
9455 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9456 %{
9457 predicate(UseAPX);
9458 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9459
9460 ins_cost(200);
9461 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9462 ins_encode %{
9463 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9464 %}
9465 ins_pipe(pipe_cmov_reg);
9466 %}
9467
9468 // Conditional move
9469 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9470 %{
9471 predicate(!UseAPX);
9472 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9473
9474 ins_cost(200); // XXX
9475 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9476 ins_encode %{
9477 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9478 %}
9479 ins_pipe(pipe_cmov_reg);
9480 %}
9481
9482 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9483 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9484
9485 ins_cost(200);
9486 expand %{
9487 cmovN_regU(cop, cr, dst, src);
9488 %}
9489 %}
9490
9491 // Conditional move ndd
9492 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9493 %{
9494 predicate(UseAPX);
9495 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9496
9497 ins_cost(200);
9498 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9499 ins_encode %{
9500 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9501 %}
9502 ins_pipe(pipe_cmov_reg);
9503 %}
9504
9505 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9506 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9507
9508 ins_cost(200);
9509 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9510 ins_encode %{
9511 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9512 %}
9513 ins_pipe(pipe_cmov_reg);
9514 %}
9515
9516 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9517 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9518 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9519
9520 ins_cost(200); // XXX
9521 format %{ "cmovpl $dst, $src\n\t"
9522 "cmovnel $dst, $src" %}
9523 ins_encode %{
9524 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9525 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9526 %}
9527 ins_pipe(pipe_cmov_reg);
9528 %}
9529
9530 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9531 // inputs of the CMove
9532 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9533 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9534 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9535
9536 ins_cost(200); // XXX
9537 format %{ "cmovpl $dst, $src\n\t"
9538 "cmovnel $dst, $src" %}
9539 ins_encode %{
9540 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9541 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9542 %}
9543 ins_pipe(pipe_cmov_reg);
9544 %}
9545
9546 // Conditional move
9547 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9548 %{
9549 predicate(!UseAPX);
9550 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9551
9552 ins_cost(200); // XXX
9553 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9554 ins_encode %{
9555 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9556 %}
9557 ins_pipe(pipe_cmov_reg); // XXX
9558 %}
9559
9560 // Conditional move ndd
9561 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9562 %{
9563 predicate(UseAPX);
9564 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9565
9566 ins_cost(200);
9567 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9568 ins_encode %{
9569 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9570 %}
9571 ins_pipe(pipe_cmov_reg);
9572 %}
9573
9574 // Conditional move
9575 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9576 %{
9577 predicate(!UseAPX);
9578 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9579
9580 ins_cost(200); // XXX
9581 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9582 ins_encode %{
9583 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9584 %}
9585 ins_pipe(pipe_cmov_reg); // XXX
9586 %}
9587
9588 // Conditional move ndd
9589 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9590 %{
9591 predicate(UseAPX);
9592 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9593
9594 ins_cost(200);
9595 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9596 ins_encode %{
9597 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9598 %}
9599 ins_pipe(pipe_cmov_reg);
9600 %}
9601
9602 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9603 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9604
9605 ins_cost(200);
9606 expand %{
9607 cmovP_regU(cop, cr, dst, src);
9608 %}
9609 %}
9610
9611 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9612 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9613
9614 ins_cost(200);
9615 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9616 ins_encode %{
9617 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9618 %}
9619 ins_pipe(pipe_cmov_reg);
9620 %}
9621
9622 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9623 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9624 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9625
9626 ins_cost(200); // XXX
9627 format %{ "cmovpq $dst, $src\n\t"
9628 "cmovneq $dst, $src" %}
9629 ins_encode %{
9630 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9631 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9632 %}
9633 ins_pipe(pipe_cmov_reg);
9634 %}
9635
9636 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9637 // inputs of the CMove
9638 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9639 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9640 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9641
9642 ins_cost(200); // XXX
9643 format %{ "cmovpq $dst, $src\n\t"
9644 "cmovneq $dst, $src" %}
9645 ins_encode %{
9646 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9647 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9648 %}
9649 ins_pipe(pipe_cmov_reg);
9650 %}
9651
9652 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9653 %{
9654 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9655 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9656
9657 ins_cost(100); // XXX
9658 format %{ "setbn$cop $dst\t# signed, long" %}
9659 ins_encode %{
9660 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9661 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9662 %}
9663 ins_pipe(ialu_reg);
9664 %}
9665
9666 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9667 %{
9668 predicate(!UseAPX);
9669 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9670
9671 ins_cost(200); // XXX
9672 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9673 ins_encode %{
9674 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9675 %}
9676 ins_pipe(pipe_cmov_reg); // XXX
9677 %}
9678
9679 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9680 %{
9681 predicate(UseAPX);
9682 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9683
9684 ins_cost(200);
9685 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9686 ins_encode %{
9687 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9688 %}
9689 ins_pipe(pipe_cmov_reg);
9690 %}
9691
9692 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9693 %{
9694 predicate(!UseAPX);
9695 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9696
9697 ins_cost(200); // XXX
9698 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9699 ins_encode %{
9700 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9701 %}
9702 ins_pipe(pipe_cmov_mem); // XXX
9703 %}
9704
9705 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9706 %{
9707 predicate(UseAPX);
9708 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9709
9710 ins_cost(200);
9711 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9712 ins_encode %{
9713 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9714 %}
9715 ins_pipe(pipe_cmov_mem);
9716 %}
9717
9718 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9719 %{
9720 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9721 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9722
9723 ins_cost(100); // XXX
9724 format %{ "setbn$cop $dst\t# unsigned, long" %}
9725 ins_encode %{
9726 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9727 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9728 %}
9729 ins_pipe(ialu_reg);
9730 %}
9731
9732 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9733 %{
9734 predicate(!UseAPX);
9735 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9736
9737 ins_cost(200); // XXX
9738 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9739 ins_encode %{
9740 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9741 %}
9742 ins_pipe(pipe_cmov_reg); // XXX
9743 %}
9744
9745 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9746 %{
9747 predicate(UseAPX);
9748 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9749
9750 ins_cost(200);
9751 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9752 ins_encode %{
9753 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9754 %}
9755 ins_pipe(pipe_cmov_reg);
9756 %}
9757
9758 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9759 %{
9760 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9761 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9762
9763 ins_cost(100); // XXX
9764 format %{ "setbn$cop $dst\t# unsigned, long" %}
9765 ins_encode %{
9766 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9767 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9768 %}
9769 ins_pipe(ialu_reg);
9770 %}
9771
9772 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9773 %{
9774 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9775 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9776
9777 ins_cost(100); // XXX
9778 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9779 ins_encode %{
9780 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9781 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9782 %}
9783 ins_pipe(ialu_reg);
9784 %}
9785
9786 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9787 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9788
9789 ins_cost(200);
9790 expand %{
9791 cmovL_regU(cop, cr, dst, src);
9792 %}
9793 %}
9794
9795 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9796 %{
9797 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9798
9799 ins_cost(200);
9800 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9801 ins_encode %{
9802 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9803 %}
9804 ins_pipe(pipe_cmov_reg);
9805 %}
9806
9807 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9808 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9809 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9810
9811 ins_cost(200); // XXX
9812 format %{ "cmovpq $dst, $src\n\t"
9813 "cmovneq $dst, $src" %}
9814 ins_encode %{
9815 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9816 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9817 %}
9818 ins_pipe(pipe_cmov_reg);
9819 %}
9820
9821 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9822 // inputs of the CMove
9823 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9824 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9825 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9826
9827 ins_cost(200); // XXX
9828 format %{ "cmovpq $dst, $src\n\t"
9829 "cmovneq $dst, $src" %}
9830 ins_encode %{
9831 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9832 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9833 %}
9834 ins_pipe(pipe_cmov_reg);
9835 %}
9836
9837 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9838 %{
9839 predicate(!UseAPX);
9840 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9841
9842 ins_cost(200); // XXX
9843 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9844 ins_encode %{
9845 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9846 %}
9847 ins_pipe(pipe_cmov_mem); // XXX
9848 %}
9849
9850 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9851 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9852
9853 ins_cost(200);
9854 expand %{
9855 cmovL_memU(cop, cr, dst, src);
9856 %}
9857 %}
9858
9859 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9860 %{
9861 predicate(UseAPX);
9862 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9863
9864 ins_cost(200);
9865 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9866 ins_encode %{
9867 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9868 %}
9869 ins_pipe(pipe_cmov_mem);
9870 %}
9871
9872 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9873 %{
9874 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9875
9876 ins_cost(200);
9877 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9878 ins_encode %{
9879 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9880 %}
9881 ins_pipe(pipe_cmov_mem);
9882 %}
9883
9884 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9885 %{
9886 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9887
9888 ins_cost(200); // XXX
9889 format %{ "jn$cop skip\t# signed cmove float\n\t"
9890 "movss $dst, $src\n"
9891 "skip:" %}
9892 ins_encode %{
9893 Label Lskip;
9894 // Invert sense of branch from sense of CMOV
9895 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9896 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9897 __ bind(Lskip);
9898 %}
9899 ins_pipe(pipe_slow);
9900 %}
9901
9902 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9903 %{
9904 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9905
9906 ins_cost(200); // XXX
9907 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9908 "movss $dst, $src\n"
9909 "skip:" %}
9910 ins_encode %{
9911 Label Lskip;
9912 // Invert sense of branch from sense of CMOV
9913 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9914 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9915 __ bind(Lskip);
9916 %}
9917 ins_pipe(pipe_slow);
9918 %}
9919
9920 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9921 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9922
9923 ins_cost(200);
9924 expand %{
9925 cmovF_regU(cop, cr, dst, src);
9926 %}
9927 %}
9928
9929 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9930 %{
9931 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9932
9933 ins_cost(200); // XXX
9934 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9935 "movss $dst, $src\n"
9936 "skip:" %}
9937 ins_encode %{
9938 Label Lskip;
9939 // Invert sense of branch from sense of CMOV
9940 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9941 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9942 __ bind(Lskip);
9943 %}
9944 ins_pipe(pipe_slow);
9945 %}
9946
9947 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9948 %{
9949 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9950
9951 ins_cost(200); // XXX
9952 format %{ "jn$cop skip\t# signed cmove double\n\t"
9953 "movsd $dst, $src\n"
9954 "skip:" %}
9955 ins_encode %{
9956 Label Lskip;
9957 // Invert sense of branch from sense of CMOV
9958 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9959 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9960 __ bind(Lskip);
9961 %}
9962 ins_pipe(pipe_slow);
9963 %}
9964
9965 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9966 %{
9967 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9968
9969 ins_cost(200); // XXX
9970 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9971 "movsd $dst, $src\n"
9972 "skip:" %}
9973 ins_encode %{
9974 Label Lskip;
9975 // Invert sense of branch from sense of CMOV
9976 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9977 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9978 __ bind(Lskip);
9979 %}
9980 ins_pipe(pipe_slow);
9981 %}
9982
9983 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9984 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9985
9986 ins_cost(200);
9987 expand %{
9988 cmovD_regU(cop, cr, dst, src);
9989 %}
9990 %}
9991
9992 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9993 %{
9994 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9995
9996 ins_cost(200); // XXX
9997 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9998 "movsd $dst, $src\n"
9999 "skip:" %}
10000 ins_encode %{
10001 Label Lskip;
10002 // Invert sense of branch from sense of CMOV
10003 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
10004 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
10005 __ bind(Lskip);
10006 %}
10007 ins_pipe(pipe_slow);
10008 %}
10009
10010 //----------Arithmetic Instructions--------------------------------------------
10011 //----------Addition Instructions----------------------------------------------
10012
10013 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10014 %{
10015 predicate(!UseAPX);
10016 match(Set dst (AddI dst src));
10017 effect(KILL cr);
10018 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019 format %{ "addl $dst, $src\t# int" %}
10020 ins_encode %{
10021 __ addl($dst$$Register, $src$$Register);
10022 %}
10023 ins_pipe(ialu_reg_reg);
10024 %}
10025
10026 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10027 %{
10028 predicate(UseAPX);
10029 match(Set dst (AddI src1 src2));
10030 effect(KILL cr);
10031 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10032
10033 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10034 ins_encode %{
10035 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10036 %}
10037 ins_pipe(ialu_reg_reg);
10038 %}
10039
10040 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10041 %{
10042 predicate(!UseAPX);
10043 match(Set dst (AddI dst src));
10044 effect(KILL cr);
10045 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10046
10047 format %{ "addl $dst, $src\t# int" %}
10048 ins_encode %{
10049 __ addl($dst$$Register, $src$$constant);
10050 %}
10051 ins_pipe( ialu_reg );
10052 %}
10053
10054 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10055 %{
10056 predicate(UseAPX);
10057 match(Set dst (AddI src1 src2));
10058 effect(KILL cr);
10059 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10060
10061 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10062 ins_encode %{
10063 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10064 %}
10065 ins_pipe( ialu_reg );
10066 %}
10067
10068 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10069 %{
10070 predicate(UseAPX);
10071 match(Set dst (AddI (LoadI src1) src2));
10072 effect(KILL cr);
10073 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10074
10075 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10076 ins_encode %{
10077 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10078 %}
10079 ins_pipe( ialu_reg );
10080 %}
10081
10082 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10083 %{
10084 predicate(!UseAPX);
10085 match(Set dst (AddI dst (LoadI src)));
10086 effect(KILL cr);
10087 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10088
10089 ins_cost(150); // XXX
10090 format %{ "addl $dst, $src\t# int" %}
10091 ins_encode %{
10092 __ addl($dst$$Register, $src$$Address);
10093 %}
10094 ins_pipe(ialu_reg_mem);
10095 %}
10096
10097 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10098 %{
10099 predicate(UseAPX);
10100 match(Set dst (AddI src1 (LoadI src2)));
10101 effect(KILL cr);
10102 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10103
10104 ins_cost(150);
10105 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10106 ins_encode %{
10107 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10108 %}
10109 ins_pipe(ialu_reg_mem);
10110 %}
10111
10112 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10113 %{
10114 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10115 effect(KILL cr);
10116 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10117
10118 ins_cost(150); // XXX
10119 format %{ "addl $dst, $src\t# int" %}
10120 ins_encode %{
10121 __ addl($dst$$Address, $src$$Register);
10122 %}
10123 ins_pipe(ialu_mem_reg);
10124 %}
10125
10126 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10127 %{
10128 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10129 effect(KILL cr);
10130 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10131
10132
10133 ins_cost(125); // XXX
10134 format %{ "addl $dst, $src\t# int" %}
10135 ins_encode %{
10136 __ addl($dst$$Address, $src$$constant);
10137 %}
10138 ins_pipe(ialu_mem_imm);
10139 %}
10140
10141 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10142 %{
10143 predicate(!UseAPX && UseIncDec);
10144 match(Set dst (AddI dst src));
10145 effect(KILL cr);
10146
10147 format %{ "incl $dst\t# int" %}
10148 ins_encode %{
10149 __ incrementl($dst$$Register);
10150 %}
10151 ins_pipe(ialu_reg);
10152 %}
10153
10154 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10155 %{
10156 predicate(UseAPX && UseIncDec);
10157 match(Set dst (AddI src val));
10158 effect(KILL cr);
10159 flag(PD::Flag_ndd_demotable_opr1);
10160
10161 format %{ "eincl $dst, $src\t# int ndd" %}
10162 ins_encode %{
10163 __ eincl($dst$$Register, $src$$Register, false);
10164 %}
10165 ins_pipe(ialu_reg);
10166 %}
10167
10168 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10169 %{
10170 predicate(UseAPX && UseIncDec);
10171 match(Set dst (AddI (LoadI src) val));
10172 effect(KILL cr);
10173
10174 format %{ "eincl $dst, $src\t# int ndd" %}
10175 ins_encode %{
10176 __ eincl($dst$$Register, $src$$Address, false);
10177 %}
10178 ins_pipe(ialu_reg);
10179 %}
10180
10181 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10182 %{
10183 predicate(UseIncDec);
10184 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10185 effect(KILL cr);
10186
10187 ins_cost(125); // XXX
10188 format %{ "incl $dst\t# int" %}
10189 ins_encode %{
10190 __ incrementl($dst$$Address);
10191 %}
10192 ins_pipe(ialu_mem_imm);
10193 %}
10194
10195 // XXX why does that use AddI
10196 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10197 %{
10198 predicate(!UseAPX && UseIncDec);
10199 match(Set dst (AddI dst src));
10200 effect(KILL cr);
10201
10202 format %{ "decl $dst\t# int" %}
10203 ins_encode %{
10204 __ decrementl($dst$$Register);
10205 %}
10206 ins_pipe(ialu_reg);
10207 %}
10208
10209 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10210 %{
10211 predicate(UseAPX && UseIncDec);
10212 match(Set dst (AddI src val));
10213 effect(KILL cr);
10214 flag(PD::Flag_ndd_demotable_opr1);
10215
10216 format %{ "edecl $dst, $src\t# int ndd" %}
10217 ins_encode %{
10218 __ edecl($dst$$Register, $src$$Register, false);
10219 %}
10220 ins_pipe(ialu_reg);
10221 %}
10222
10223 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10224 %{
10225 predicate(UseAPX && UseIncDec);
10226 match(Set dst (AddI (LoadI src) val));
10227 effect(KILL cr);
10228
10229 format %{ "edecl $dst, $src\t# int ndd" %}
10230 ins_encode %{
10231 __ edecl($dst$$Register, $src$$Address, false);
10232 %}
10233 ins_pipe(ialu_reg);
10234 %}
10235
10236 // XXX why does that use AddI
10237 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10238 %{
10239 predicate(UseIncDec);
10240 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10241 effect(KILL cr);
10242
10243 ins_cost(125); // XXX
10244 format %{ "decl $dst\t# int" %}
10245 ins_encode %{
10246 __ decrementl($dst$$Address);
10247 %}
10248 ins_pipe(ialu_mem_imm);
10249 %}
10250
10251 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10252 %{
10253 predicate(VM_Version::supports_fast_2op_lea());
10254 match(Set dst (AddI (LShiftI index scale) disp));
10255
10256 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10257 ins_encode %{
10258 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10259 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10260 %}
10261 ins_pipe(ialu_reg_reg);
10262 %}
10263
10264 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10265 %{
10266 predicate(VM_Version::supports_fast_3op_lea());
10267 match(Set dst (AddI (AddI base index) disp));
10268
10269 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10270 ins_encode %{
10271 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10272 %}
10273 ins_pipe(ialu_reg_reg);
10274 %}
10275
10276 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10277 %{
10278 predicate(VM_Version::supports_fast_2op_lea());
10279 match(Set dst (AddI base (LShiftI index scale)));
10280
10281 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10282 ins_encode %{
10283 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10284 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10285 %}
10286 ins_pipe(ialu_reg_reg);
10287 %}
10288
10289 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10290 %{
10291 predicate(VM_Version::supports_fast_3op_lea());
10292 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10293
10294 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10295 ins_encode %{
10296 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10297 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10298 %}
10299 ins_pipe(ialu_reg_reg);
10300 %}
10301
10302 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10303 %{
10304 predicate(!UseAPX);
10305 match(Set dst (AddL dst src));
10306 effect(KILL cr);
10307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308
10309 format %{ "addq $dst, $src\t# long" %}
10310 ins_encode %{
10311 __ addq($dst$$Register, $src$$Register);
10312 %}
10313 ins_pipe(ialu_reg_reg);
10314 %}
10315
10316 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10317 %{
10318 predicate(UseAPX);
10319 match(Set dst (AddL src1 src2));
10320 effect(KILL cr);
10321 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10322
10323 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10324 ins_encode %{
10325 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10326 %}
10327 ins_pipe(ialu_reg_reg);
10328 %}
10329
10330 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10331 %{
10332 predicate(!UseAPX);
10333 match(Set dst (AddL dst src));
10334 effect(KILL cr);
10335 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10336
10337 format %{ "addq $dst, $src\t# long" %}
10338 ins_encode %{
10339 __ addq($dst$$Register, $src$$constant);
10340 %}
10341 ins_pipe( ialu_reg );
10342 %}
10343
10344 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10345 %{
10346 predicate(UseAPX);
10347 match(Set dst (AddL src1 src2));
10348 effect(KILL cr);
10349 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10350
10351 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10352 ins_encode %{
10353 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10354 %}
10355 ins_pipe( ialu_reg );
10356 %}
10357
10358 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10359 %{
10360 predicate(UseAPX);
10361 match(Set dst (AddL (LoadL src1) src2));
10362 effect(KILL cr);
10363 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10364
10365 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10366 ins_encode %{
10367 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10368 %}
10369 ins_pipe( ialu_reg );
10370 %}
10371
10372 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10373 %{
10374 predicate(!UseAPX);
10375 match(Set dst (AddL dst (LoadL src)));
10376 effect(KILL cr);
10377 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10378
10379 ins_cost(150); // XXX
10380 format %{ "addq $dst, $src\t# long" %}
10381 ins_encode %{
10382 __ addq($dst$$Register, $src$$Address);
10383 %}
10384 ins_pipe(ialu_reg_mem);
10385 %}
10386
10387 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10388 %{
10389 predicate(UseAPX);
10390 match(Set dst (AddL src1 (LoadL src2)));
10391 effect(KILL cr);
10392 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10393
10394 ins_cost(150);
10395 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10396 ins_encode %{
10397 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10398 %}
10399 ins_pipe(ialu_reg_mem);
10400 %}
10401
10402 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10403 %{
10404 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10405 effect(KILL cr);
10406 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10407
10408 ins_cost(150); // XXX
10409 format %{ "addq $dst, $src\t# long" %}
10410 ins_encode %{
10411 __ addq($dst$$Address, $src$$Register);
10412 %}
10413 ins_pipe(ialu_mem_reg);
10414 %}
10415
10416 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10417 %{
10418 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10419 effect(KILL cr);
10420 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10421
10422 ins_cost(125); // XXX
10423 format %{ "addq $dst, $src\t# long" %}
10424 ins_encode %{
10425 __ addq($dst$$Address, $src$$constant);
10426 %}
10427 ins_pipe(ialu_mem_imm);
10428 %}
10429
10430 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10431 %{
10432 predicate(!UseAPX && UseIncDec);
10433 match(Set dst (AddL dst src));
10434 effect(KILL cr);
10435
10436 format %{ "incq $dst\t# long" %}
10437 ins_encode %{
10438 __ incrementq($dst$$Register);
10439 %}
10440 ins_pipe(ialu_reg);
10441 %}
10442
10443 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10444 %{
10445 predicate(UseAPX && UseIncDec);
10446 match(Set dst (AddL src val));
10447 effect(KILL cr);
10448 flag(PD::Flag_ndd_demotable_opr1);
10449
10450 format %{ "eincq $dst, $src\t# long ndd" %}
10451 ins_encode %{
10452 __ eincq($dst$$Register, $src$$Register, false);
10453 %}
10454 ins_pipe(ialu_reg);
10455 %}
10456
10457 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10458 %{
10459 predicate(UseAPX && UseIncDec);
10460 match(Set dst (AddL (LoadL src) val));
10461 effect(KILL cr);
10462
10463 format %{ "eincq $dst, $src\t# long ndd" %}
10464 ins_encode %{
10465 __ eincq($dst$$Register, $src$$Address, false);
10466 %}
10467 ins_pipe(ialu_reg);
10468 %}
10469
10470 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10471 %{
10472 predicate(UseIncDec);
10473 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10474 effect(KILL cr);
10475
10476 ins_cost(125); // XXX
10477 format %{ "incq $dst\t# long" %}
10478 ins_encode %{
10479 __ incrementq($dst$$Address);
10480 %}
10481 ins_pipe(ialu_mem_imm);
10482 %}
10483
10484 // XXX why does that use AddL
10485 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10486 %{
10487 predicate(!UseAPX && UseIncDec);
10488 match(Set dst (AddL dst src));
10489 effect(KILL cr);
10490
10491 format %{ "decq $dst\t# long" %}
10492 ins_encode %{
10493 __ decrementq($dst$$Register);
10494 %}
10495 ins_pipe(ialu_reg);
10496 %}
10497
10498 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10499 %{
10500 predicate(UseAPX && UseIncDec);
10501 match(Set dst (AddL src val));
10502 effect(KILL cr);
10503 flag(PD::Flag_ndd_demotable_opr1);
10504
10505 format %{ "edecq $dst, $src\t# long ndd" %}
10506 ins_encode %{
10507 __ edecq($dst$$Register, $src$$Register, false);
10508 %}
10509 ins_pipe(ialu_reg);
10510 %}
10511
10512 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10513 %{
10514 predicate(UseAPX && UseIncDec);
10515 match(Set dst (AddL (LoadL src) val));
10516 effect(KILL cr);
10517
10518 format %{ "edecq $dst, $src\t# long ndd" %}
10519 ins_encode %{
10520 __ edecq($dst$$Register, $src$$Address, false);
10521 %}
10522 ins_pipe(ialu_reg);
10523 %}
10524
10525 // XXX why does that use AddL
10526 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10527 %{
10528 predicate(UseIncDec);
10529 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10530 effect(KILL cr);
10531
10532 ins_cost(125); // XXX
10533 format %{ "decq $dst\t# long" %}
10534 ins_encode %{
10535 __ decrementq($dst$$Address);
10536 %}
10537 ins_pipe(ialu_mem_imm);
10538 %}
10539
10540 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10541 %{
10542 predicate(VM_Version::supports_fast_2op_lea());
10543 match(Set dst (AddL (LShiftL index scale) disp));
10544
10545 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10546 ins_encode %{
10547 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10548 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10549 %}
10550 ins_pipe(ialu_reg_reg);
10551 %}
10552
10553 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10554 %{
10555 predicate(VM_Version::supports_fast_3op_lea());
10556 match(Set dst (AddL (AddL base index) disp));
10557
10558 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10559 ins_encode %{
10560 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10561 %}
10562 ins_pipe(ialu_reg_reg);
10563 %}
10564
10565 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10566 %{
10567 predicate(VM_Version::supports_fast_2op_lea());
10568 match(Set dst (AddL base (LShiftL index scale)));
10569
10570 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10571 ins_encode %{
10572 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10573 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10574 %}
10575 ins_pipe(ialu_reg_reg);
10576 %}
10577
10578 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10579 %{
10580 predicate(VM_Version::supports_fast_3op_lea());
10581 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10582
10583 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10584 ins_encode %{
10585 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10586 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10587 %}
10588 ins_pipe(ialu_reg_reg);
10589 %}
10590
10591 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10592 %{
10593 match(Set dst (AddP dst src));
10594 effect(KILL cr);
10595 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10596
10597 format %{ "addq $dst, $src\t# ptr" %}
10598 ins_encode %{
10599 __ addq($dst$$Register, $src$$Register);
10600 %}
10601 ins_pipe(ialu_reg_reg);
10602 %}
10603
10604 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10605 %{
10606 match(Set dst (AddP dst src));
10607 effect(KILL cr);
10608 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10609
10610 format %{ "addq $dst, $src\t# ptr" %}
10611 ins_encode %{
10612 __ addq($dst$$Register, $src$$constant);
10613 %}
10614 ins_pipe( ialu_reg );
10615 %}
10616
10617 // XXX addP mem ops ????
10618
10619 instruct checkCastPP(rRegP dst)
10620 %{
10621 match(Set dst (CheckCastPP dst));
10622
10623 size(0);
10624 format %{ "# checkcastPP of $dst" %}
10625 ins_encode(/* empty encoding */);
10626 ins_pipe(empty);
10627 %}
10628
10629 instruct castPP(rRegP dst)
10630 %{
10631 match(Set dst (CastPP dst));
10632
10633 size(0);
10634 format %{ "# castPP of $dst" %}
10635 ins_encode(/* empty encoding */);
10636 ins_pipe(empty);
10637 %}
10638
10639 instruct castII(rRegI dst)
10640 %{
10641 predicate(VerifyConstraintCasts == 0);
10642 match(Set dst (CastII dst));
10643
10644 size(0);
10645 format %{ "# castII of $dst" %}
10646 ins_encode(/* empty encoding */);
10647 ins_cost(0);
10648 ins_pipe(empty);
10649 %}
10650
10651 instruct castII_checked(rRegI dst, rFlagsReg cr)
10652 %{
10653 predicate(VerifyConstraintCasts > 0);
10654 match(Set dst (CastII dst));
10655
10656 effect(KILL cr);
10657 format %{ "# cast_checked_II $dst" %}
10658 ins_encode %{
10659 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10660 %}
10661 ins_pipe(pipe_slow);
10662 %}
10663
10664 instruct castLL(rRegL dst)
10665 %{
10666 predicate(VerifyConstraintCasts == 0);
10667 match(Set dst (CastLL dst));
10668
10669 size(0);
10670 format %{ "# castLL of $dst" %}
10671 ins_encode(/* empty encoding */);
10672 ins_cost(0);
10673 ins_pipe(empty);
10674 %}
10675
10676 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10677 %{
10678 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10679 match(Set dst (CastLL dst));
10680
10681 effect(KILL cr);
10682 format %{ "# cast_checked_LL $dst" %}
10683 ins_encode %{
10684 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10685 %}
10686 ins_pipe(pipe_slow);
10687 %}
10688
10689 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10690 %{
10691 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10692 match(Set dst (CastLL dst));
10693
10694 effect(KILL cr, TEMP tmp);
10695 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10696 ins_encode %{
10697 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10698 %}
10699 ins_pipe(pipe_slow);
10700 %}
10701
10702 instruct castFF(regF dst)
10703 %{
10704 match(Set dst (CastFF dst));
10705
10706 size(0);
10707 format %{ "# castFF of $dst" %}
10708 ins_encode(/* empty encoding */);
10709 ins_cost(0);
10710 ins_pipe(empty);
10711 %}
10712
10713 instruct castHH(regF dst)
10714 %{
10715 match(Set dst (CastHH dst));
10716
10717 size(0);
10718 format %{ "# castHH of $dst" %}
10719 ins_encode(/* empty encoding */);
10720 ins_cost(0);
10721 ins_pipe(empty);
10722 %}
10723
10724 instruct castDD(regD dst)
10725 %{
10726 match(Set dst (CastDD dst));
10727
10728 size(0);
10729 format %{ "# castDD of $dst" %}
10730 ins_encode(/* empty encoding */);
10731 ins_cost(0);
10732 ins_pipe(empty);
10733 %}
10734
10735 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10736 instruct compareAndSwapP(rRegI res,
10737 memory mem_ptr,
10738 rax_RegP oldval, rRegP newval,
10739 rFlagsReg cr)
10740 %{
10741 predicate(n->as_LoadStore()->barrier_data() == 0);
10742 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10743 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10744 effect(KILL cr, KILL oldval);
10745
10746 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10747 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10748 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10749 ins_encode %{
10750 __ lock();
10751 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10752 __ setcc(Assembler::equal, $res$$Register);
10753 %}
10754 ins_pipe( pipe_cmpxchg );
10755 %}
10756
10757 instruct compareAndSwapL(rRegI res,
10758 memory mem_ptr,
10759 rax_RegL oldval, rRegL newval,
10760 rFlagsReg cr)
10761 %{
10762 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10763 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10764 effect(KILL cr, KILL oldval);
10765
10766 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10767 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10768 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10769 ins_encode %{
10770 __ lock();
10771 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10772 __ setcc(Assembler::equal, $res$$Register);
10773 %}
10774 ins_pipe( pipe_cmpxchg );
10775 %}
10776
10777 instruct compareAndSwapI(rRegI res,
10778 memory mem_ptr,
10779 rax_RegI oldval, rRegI newval,
10780 rFlagsReg cr)
10781 %{
10782 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10783 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10784 effect(KILL cr, KILL oldval);
10785
10786 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10787 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10788 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10789 ins_encode %{
10790 __ lock();
10791 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10792 __ setcc(Assembler::equal, $res$$Register);
10793 %}
10794 ins_pipe( pipe_cmpxchg );
10795 %}
10796
10797 instruct compareAndSwapB(rRegI res,
10798 memory mem_ptr,
10799 rax_RegI oldval, rRegI newval,
10800 rFlagsReg cr)
10801 %{
10802 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10803 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10804 effect(KILL cr, KILL oldval);
10805
10806 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10807 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10808 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10809 ins_encode %{
10810 __ lock();
10811 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10812 __ setcc(Assembler::equal, $res$$Register);
10813 %}
10814 ins_pipe( pipe_cmpxchg );
10815 %}
10816
10817 instruct compareAndSwapS(rRegI res,
10818 memory mem_ptr,
10819 rax_RegI oldval, rRegI newval,
10820 rFlagsReg cr)
10821 %{
10822 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10823 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10824 effect(KILL cr, KILL oldval);
10825
10826 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10827 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10828 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10829 ins_encode %{
10830 __ lock();
10831 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10832 __ setcc(Assembler::equal, $res$$Register);
10833 %}
10834 ins_pipe( pipe_cmpxchg );
10835 %}
10836
10837 instruct compareAndSwapN(rRegI res,
10838 memory mem_ptr,
10839 rax_RegN oldval, rRegN newval,
10840 rFlagsReg cr) %{
10841 predicate(n->as_LoadStore()->barrier_data() == 0);
10842 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10843 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10844 effect(KILL cr, KILL oldval);
10845
10846 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10847 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10848 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10849 ins_encode %{
10850 __ lock();
10851 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852 __ setcc(Assembler::equal, $res$$Register);
10853 %}
10854 ins_pipe( pipe_cmpxchg );
10855 %}
10856
10857 instruct compareAndExchangeB(
10858 memory mem_ptr,
10859 rax_RegI oldval, rRegI newval,
10860 rFlagsReg cr)
10861 %{
10862 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10863 effect(KILL cr);
10864
10865 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10866 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10867 ins_encode %{
10868 __ lock();
10869 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10870 %}
10871 ins_pipe( pipe_cmpxchg );
10872 %}
10873
10874 instruct compareAndExchangeS(
10875 memory mem_ptr,
10876 rax_RegI oldval, rRegI newval,
10877 rFlagsReg cr)
10878 %{
10879 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10880 effect(KILL cr);
10881
10882 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10883 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10884 ins_encode %{
10885 __ lock();
10886 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10887 %}
10888 ins_pipe( pipe_cmpxchg );
10889 %}
10890
10891 instruct compareAndExchangeI(
10892 memory mem_ptr,
10893 rax_RegI oldval, rRegI newval,
10894 rFlagsReg cr)
10895 %{
10896 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10897 effect(KILL cr);
10898
10899 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10900 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10901 ins_encode %{
10902 __ lock();
10903 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10904 %}
10905 ins_pipe( pipe_cmpxchg );
10906 %}
10907
10908 instruct compareAndExchangeL(
10909 memory mem_ptr,
10910 rax_RegL oldval, rRegL newval,
10911 rFlagsReg cr)
10912 %{
10913 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10914 effect(KILL cr);
10915
10916 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10917 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10918 ins_encode %{
10919 __ lock();
10920 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10921 %}
10922 ins_pipe( pipe_cmpxchg );
10923 %}
10924
10925 instruct compareAndExchangeN(
10926 memory mem_ptr,
10927 rax_RegN oldval, rRegN newval,
10928 rFlagsReg cr) %{
10929 predicate(n->as_LoadStore()->barrier_data() == 0);
10930 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10931 effect(KILL cr);
10932
10933 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10934 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10935 ins_encode %{
10936 __ lock();
10937 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10938 %}
10939 ins_pipe( pipe_cmpxchg );
10940 %}
10941
10942 instruct compareAndExchangeP(
10943 memory mem_ptr,
10944 rax_RegP oldval, rRegP newval,
10945 rFlagsReg cr)
10946 %{
10947 predicate(n->as_LoadStore()->barrier_data() == 0);
10948 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10949 effect(KILL cr);
10950
10951 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10952 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10953 ins_encode %{
10954 __ lock();
10955 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10956 %}
10957 ins_pipe( pipe_cmpxchg );
10958 %}
10959
10960 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10961 predicate(n->as_LoadStore()->result_not_used());
10962 match(Set dummy (GetAndAddB mem add));
10963 effect(KILL cr);
10964 format %{ "addb_lock $mem, $add" %}
10965 ins_encode %{
10966 __ lock();
10967 __ addb($mem$$Address, $add$$Register);
10968 %}
10969 ins_pipe(pipe_cmpxchg);
10970 %}
10971
10972 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10973 predicate(n->as_LoadStore()->result_not_used());
10974 match(Set dummy (GetAndAddB mem add));
10975 effect(KILL cr);
10976 format %{ "addb_lock $mem, $add" %}
10977 ins_encode %{
10978 __ lock();
10979 __ addb($mem$$Address, $add$$constant);
10980 %}
10981 ins_pipe(pipe_cmpxchg);
10982 %}
10983
10984 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10985 predicate(!n->as_LoadStore()->result_not_used());
10986 match(Set newval (GetAndAddB mem newval));
10987 effect(KILL cr);
10988 format %{ "xaddb_lock $mem, $newval" %}
10989 ins_encode %{
10990 __ lock();
10991 __ xaddb($mem$$Address, $newval$$Register);
10992 %}
10993 ins_pipe(pipe_cmpxchg);
10994 %}
10995
10996 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10997 predicate(n->as_LoadStore()->result_not_used());
10998 match(Set dummy (GetAndAddS mem add));
10999 effect(KILL cr);
11000 format %{ "addw_lock $mem, $add" %}
11001 ins_encode %{
11002 __ lock();
11003 __ addw($mem$$Address, $add$$Register);
11004 %}
11005 ins_pipe(pipe_cmpxchg);
11006 %}
11007
11008 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11009 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
11010 match(Set dummy (GetAndAddS mem add));
11011 effect(KILL cr);
11012 format %{ "addw_lock $mem, $add" %}
11013 ins_encode %{
11014 __ lock();
11015 __ addw($mem$$Address, $add$$constant);
11016 %}
11017 ins_pipe(pipe_cmpxchg);
11018 %}
11019
11020 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
11021 predicate(!n->as_LoadStore()->result_not_used());
11022 match(Set newval (GetAndAddS mem newval));
11023 effect(KILL cr);
11024 format %{ "xaddw_lock $mem, $newval" %}
11025 ins_encode %{
11026 __ lock();
11027 __ xaddw($mem$$Address, $newval$$Register);
11028 %}
11029 ins_pipe(pipe_cmpxchg);
11030 %}
11031
11032 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11033 predicate(n->as_LoadStore()->result_not_used());
11034 match(Set dummy (GetAndAddI mem add));
11035 effect(KILL cr);
11036 format %{ "addl_lock $mem, $add" %}
11037 ins_encode %{
11038 __ lock();
11039 __ addl($mem$$Address, $add$$Register);
11040 %}
11041 ins_pipe(pipe_cmpxchg);
11042 %}
11043
11044 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11045 predicate(n->as_LoadStore()->result_not_used());
11046 match(Set dummy (GetAndAddI mem add));
11047 effect(KILL cr);
11048 format %{ "addl_lock $mem, $add" %}
11049 ins_encode %{
11050 __ lock();
11051 __ addl($mem$$Address, $add$$constant);
11052 %}
11053 ins_pipe(pipe_cmpxchg);
11054 %}
11055
11056 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11057 predicate(!n->as_LoadStore()->result_not_used());
11058 match(Set newval (GetAndAddI mem newval));
11059 effect(KILL cr);
11060 format %{ "xaddl_lock $mem, $newval" %}
11061 ins_encode %{
11062 __ lock();
11063 __ xaddl($mem$$Address, $newval$$Register);
11064 %}
11065 ins_pipe(pipe_cmpxchg);
11066 %}
11067
11068 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11069 predicate(n->as_LoadStore()->result_not_used());
11070 match(Set dummy (GetAndAddL mem add));
11071 effect(KILL cr);
11072 format %{ "addq_lock $mem, $add" %}
11073 ins_encode %{
11074 __ lock();
11075 __ addq($mem$$Address, $add$$Register);
11076 %}
11077 ins_pipe(pipe_cmpxchg);
11078 %}
11079
11080 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11081 predicate(n->as_LoadStore()->result_not_used());
11082 match(Set dummy (GetAndAddL mem add));
11083 effect(KILL cr);
11084 format %{ "addq_lock $mem, $add" %}
11085 ins_encode %{
11086 __ lock();
11087 __ addq($mem$$Address, $add$$constant);
11088 %}
11089 ins_pipe(pipe_cmpxchg);
11090 %}
11091
11092 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11093 predicate(!n->as_LoadStore()->result_not_used());
11094 match(Set newval (GetAndAddL mem newval));
11095 effect(KILL cr);
11096 format %{ "xaddq_lock $mem, $newval" %}
11097 ins_encode %{
11098 __ lock();
11099 __ xaddq($mem$$Address, $newval$$Register);
11100 %}
11101 ins_pipe(pipe_cmpxchg);
11102 %}
11103
11104 instruct xchgB( memory mem, rRegI newval) %{
11105 match(Set newval (GetAndSetB mem newval));
11106 format %{ "XCHGB $newval,[$mem]" %}
11107 ins_encode %{
11108 __ xchgb($newval$$Register, $mem$$Address);
11109 %}
11110 ins_pipe( pipe_cmpxchg );
11111 %}
11112
11113 instruct xchgS( memory mem, rRegI newval) %{
11114 match(Set newval (GetAndSetS mem newval));
11115 format %{ "XCHGW $newval,[$mem]" %}
11116 ins_encode %{
11117 __ xchgw($newval$$Register, $mem$$Address);
11118 %}
11119 ins_pipe( pipe_cmpxchg );
11120 %}
11121
11122 instruct xchgI( memory mem, rRegI newval) %{
11123 match(Set newval (GetAndSetI mem newval));
11124 format %{ "XCHGL $newval,[$mem]" %}
11125 ins_encode %{
11126 __ xchgl($newval$$Register, $mem$$Address);
11127 %}
11128 ins_pipe( pipe_cmpxchg );
11129 %}
11130
11131 instruct xchgL( memory mem, rRegL newval) %{
11132 match(Set newval (GetAndSetL mem newval));
11133 format %{ "XCHGL $newval,[$mem]" %}
11134 ins_encode %{
11135 __ xchgq($newval$$Register, $mem$$Address);
11136 %}
11137 ins_pipe( pipe_cmpxchg );
11138 %}
11139
11140 instruct xchgP( memory mem, rRegP newval) %{
11141 match(Set newval (GetAndSetP mem newval));
11142 predicate(n->as_LoadStore()->barrier_data() == 0);
11143 format %{ "XCHGQ $newval,[$mem]" %}
11144 ins_encode %{
11145 __ xchgq($newval$$Register, $mem$$Address);
11146 %}
11147 ins_pipe( pipe_cmpxchg );
11148 %}
11149
11150 instruct xchgN( memory mem, rRegN newval) %{
11151 predicate(n->as_LoadStore()->barrier_data() == 0);
11152 match(Set newval (GetAndSetN mem newval));
11153 format %{ "XCHGL $newval,$mem]" %}
11154 ins_encode %{
11155 __ xchgl($newval$$Register, $mem$$Address);
11156 %}
11157 ins_pipe( pipe_cmpxchg );
11158 %}
11159
11160 //----------Abs Instructions-------------------------------------------
11161
11162 // Integer Absolute Instructions
11163 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11164 %{
11165 match(Set dst (AbsI src));
11166 effect(TEMP dst, KILL cr);
11167 format %{ "xorl $dst, $dst\t# abs int\n\t"
11168 "subl $dst, $src\n\t"
11169 "cmovll $dst, $src" %}
11170 ins_encode %{
11171 __ xorl($dst$$Register, $dst$$Register);
11172 __ subl($dst$$Register, $src$$Register);
11173 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11174 %}
11175
11176 ins_pipe(ialu_reg_reg);
11177 %}
11178
11179 // Long Absolute Instructions
11180 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11181 %{
11182 match(Set dst (AbsL src));
11183 effect(TEMP dst, KILL cr);
11184 format %{ "xorl $dst, $dst\t# abs long\n\t"
11185 "subq $dst, $src\n\t"
11186 "cmovlq $dst, $src" %}
11187 ins_encode %{
11188 __ xorl($dst$$Register, $dst$$Register);
11189 __ subq($dst$$Register, $src$$Register);
11190 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11191 %}
11192
11193 ins_pipe(ialu_reg_reg);
11194 %}
11195
11196 //----------Subtraction Instructions-------------------------------------------
11197
11198 // Integer Subtraction Instructions
11199 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11200 %{
11201 predicate(!UseAPX);
11202 match(Set dst (SubI dst src));
11203 effect(KILL cr);
11204 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11205
11206 format %{ "subl $dst, $src\t# int" %}
11207 ins_encode %{
11208 __ subl($dst$$Register, $src$$Register);
11209 %}
11210 ins_pipe(ialu_reg_reg);
11211 %}
11212
11213 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11214 %{
11215 predicate(UseAPX);
11216 match(Set dst (SubI src1 src2));
11217 effect(KILL cr);
11218 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11219
11220 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11221 ins_encode %{
11222 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11223 %}
11224 ins_pipe(ialu_reg_reg);
11225 %}
11226
11227 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11228 %{
11229 predicate(UseAPX);
11230 match(Set dst (SubI src1 src2));
11231 effect(KILL cr);
11232 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11233
11234 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11235 ins_encode %{
11236 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11237 %}
11238 ins_pipe(ialu_reg_reg);
11239 %}
11240
11241 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11242 %{
11243 predicate(UseAPX);
11244 match(Set dst (SubI (LoadI src1) src2));
11245 effect(KILL cr);
11246 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11247
11248 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11249 ins_encode %{
11250 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11251 %}
11252 ins_pipe(ialu_reg_reg);
11253 %}
11254
11255 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11256 %{
11257 predicate(!UseAPX);
11258 match(Set dst (SubI dst (LoadI src)));
11259 effect(KILL cr);
11260 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11261
11262 ins_cost(150);
11263 format %{ "subl $dst, $src\t# int" %}
11264 ins_encode %{
11265 __ subl($dst$$Register, $src$$Address);
11266 %}
11267 ins_pipe(ialu_reg_mem);
11268 %}
11269
11270 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11271 %{
11272 predicate(UseAPX);
11273 match(Set dst (SubI src1 (LoadI src2)));
11274 effect(KILL cr);
11275 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11276
11277 ins_cost(150);
11278 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11279 ins_encode %{
11280 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11281 %}
11282 ins_pipe(ialu_reg_mem);
11283 %}
11284
11285 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11286 %{
11287 predicate(UseAPX);
11288 match(Set dst (SubI (LoadI src1) src2));
11289 effect(KILL cr);
11290 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11291
11292 ins_cost(150);
11293 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11294 ins_encode %{
11295 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11296 %}
11297 ins_pipe(ialu_reg_mem);
11298 %}
11299
11300 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11301 %{
11302 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11303 effect(KILL cr);
11304 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11305
11306 ins_cost(150);
11307 format %{ "subl $dst, $src\t# int" %}
11308 ins_encode %{
11309 __ subl($dst$$Address, $src$$Register);
11310 %}
11311 ins_pipe(ialu_mem_reg);
11312 %}
11313
11314 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11315 %{
11316 predicate(!UseAPX);
11317 match(Set dst (SubL dst src));
11318 effect(KILL cr);
11319 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11320
11321 format %{ "subq $dst, $src\t# long" %}
11322 ins_encode %{
11323 __ subq($dst$$Register, $src$$Register);
11324 %}
11325 ins_pipe(ialu_reg_reg);
11326 %}
11327
11328 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11329 %{
11330 predicate(UseAPX);
11331 match(Set dst (SubL src1 src2));
11332 effect(KILL cr);
11333 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11334
11335 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11336 ins_encode %{
11337 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11338 %}
11339 ins_pipe(ialu_reg_reg);
11340 %}
11341
11342 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11343 %{
11344 predicate(UseAPX);
11345 match(Set dst (SubL src1 src2));
11346 effect(KILL cr);
11347 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11348
11349 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11350 ins_encode %{
11351 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11352 %}
11353 ins_pipe(ialu_reg_reg);
11354 %}
11355
11356 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11357 %{
11358 predicate(UseAPX);
11359 match(Set dst (SubL (LoadL src1) src2));
11360 effect(KILL cr);
11361 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11362
11363 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11364 ins_encode %{
11365 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11366 %}
11367 ins_pipe(ialu_reg_reg);
11368 %}
11369
11370 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11371 %{
11372 predicate(!UseAPX);
11373 match(Set dst (SubL dst (LoadL src)));
11374 effect(KILL cr);
11375 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11376
11377 ins_cost(150);
11378 format %{ "subq $dst, $src\t# long" %}
11379 ins_encode %{
11380 __ subq($dst$$Register, $src$$Address);
11381 %}
11382 ins_pipe(ialu_reg_mem);
11383 %}
11384
11385 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11386 %{
11387 predicate(UseAPX);
11388 match(Set dst (SubL src1 (LoadL src2)));
11389 effect(KILL cr);
11390 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11391
11392 ins_cost(150);
11393 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11394 ins_encode %{
11395 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11396 %}
11397 ins_pipe(ialu_reg_mem);
11398 %}
11399
11400 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11401 %{
11402 predicate(UseAPX);
11403 match(Set dst (SubL (LoadL src1) src2));
11404 effect(KILL cr);
11405 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11406
11407 ins_cost(150);
11408 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11409 ins_encode %{
11410 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11411 %}
11412 ins_pipe(ialu_reg_mem);
11413 %}
11414
11415 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11416 %{
11417 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11418 effect(KILL cr);
11419 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11420
11421 ins_cost(150);
11422 format %{ "subq $dst, $src\t# long" %}
11423 ins_encode %{
11424 __ subq($dst$$Address, $src$$Register);
11425 %}
11426 ins_pipe(ialu_mem_reg);
11427 %}
11428
11429 // Subtract from a pointer
11430 // XXX hmpf???
11431 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11432 %{
11433 match(Set dst (AddP dst (SubI zero src)));
11434 effect(KILL cr);
11435
11436 format %{ "subq $dst, $src\t# ptr - int" %}
11437 ins_encode %{
11438 __ subq($dst$$Register, $src$$Register);
11439 %}
11440 ins_pipe(ialu_reg_reg);
11441 %}
11442
11443 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11444 %{
11445 predicate(!UseAPX);
11446 match(Set dst (SubI zero dst));
11447 effect(KILL cr);
11448 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11449
11450 format %{ "negl $dst\t# int" %}
11451 ins_encode %{
11452 __ negl($dst$$Register);
11453 %}
11454 ins_pipe(ialu_reg);
11455 %}
11456
11457 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11458 %{
11459 predicate(UseAPX);
11460 match(Set dst (SubI zero src));
11461 effect(KILL cr);
11462 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11463
11464 format %{ "enegl $dst, $src\t# int ndd" %}
11465 ins_encode %{
11466 __ enegl($dst$$Register, $src$$Register, false);
11467 %}
11468 ins_pipe(ialu_reg);
11469 %}
11470
11471 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11472 %{
11473 predicate(!UseAPX);
11474 match(Set dst (NegI dst));
11475 effect(KILL cr);
11476 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11477
11478 format %{ "negl $dst\t# int" %}
11479 ins_encode %{
11480 __ negl($dst$$Register);
11481 %}
11482 ins_pipe(ialu_reg);
11483 %}
11484
11485 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11486 %{
11487 predicate(UseAPX);
11488 match(Set dst (NegI src));
11489 effect(KILL cr);
11490 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11491
11492 format %{ "enegl $dst, $src\t# int ndd" %}
11493 ins_encode %{
11494 __ enegl($dst$$Register, $src$$Register, false);
11495 %}
11496 ins_pipe(ialu_reg);
11497 %}
11498
11499 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11500 %{
11501 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11502 effect(KILL cr);
11503 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11504
11505 format %{ "negl $dst\t# int" %}
11506 ins_encode %{
11507 __ negl($dst$$Address);
11508 %}
11509 ins_pipe(ialu_reg);
11510 %}
11511
11512 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11513 %{
11514 predicate(!UseAPX);
11515 match(Set dst (SubL zero dst));
11516 effect(KILL cr);
11517 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11518
11519 format %{ "negq $dst\t# long" %}
11520 ins_encode %{
11521 __ negq($dst$$Register);
11522 %}
11523 ins_pipe(ialu_reg);
11524 %}
11525
11526 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11527 %{
11528 predicate(UseAPX);
11529 match(Set dst (SubL zero src));
11530 effect(KILL cr);
11531 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11532
11533 format %{ "enegq $dst, $src\t# long ndd" %}
11534 ins_encode %{
11535 __ enegq($dst$$Register, $src$$Register, false);
11536 %}
11537 ins_pipe(ialu_reg);
11538 %}
11539
11540 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11541 %{
11542 predicate(!UseAPX);
11543 match(Set dst (NegL dst));
11544 effect(KILL cr);
11545 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11546
11547 format %{ "negq $dst\t# int" %}
11548 ins_encode %{
11549 __ negq($dst$$Register);
11550 %}
11551 ins_pipe(ialu_reg);
11552 %}
11553
11554 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11555 %{
11556 predicate(UseAPX);
11557 match(Set dst (NegL src));
11558 effect(KILL cr);
11559 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11560
11561 format %{ "enegq $dst, $src\t# long ndd" %}
11562 ins_encode %{
11563 __ enegq($dst$$Register, $src$$Register, false);
11564 %}
11565 ins_pipe(ialu_reg);
11566 %}
11567
11568 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11569 %{
11570 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11571 effect(KILL cr);
11572 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11573
11574 format %{ "negq $dst\t# long" %}
11575 ins_encode %{
11576 __ negq($dst$$Address);
11577 %}
11578 ins_pipe(ialu_reg);
11579 %}
11580
11581 //----------Multiplication/Division Instructions-------------------------------
11582 // Integer Multiplication Instructions
11583 // Multiply Register
11584
11585 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11586 %{
11587 predicate(!UseAPX);
11588 match(Set dst (MulI dst src));
11589 effect(KILL cr);
11590
11591 ins_cost(300);
11592 format %{ "imull $dst, $src\t# int" %}
11593 ins_encode %{
11594 __ imull($dst$$Register, $src$$Register);
11595 %}
11596 ins_pipe(ialu_reg_reg_alu0);
11597 %}
11598
11599 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11600 %{
11601 predicate(UseAPX);
11602 match(Set dst (MulI src1 src2));
11603 effect(KILL cr);
11604 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11605
11606 ins_cost(300);
11607 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11608 ins_encode %{
11609 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11610 %}
11611 ins_pipe(ialu_reg_reg_alu0);
11612 %}
11613
11614 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11615 %{
11616 match(Set dst (MulI src imm));
11617 effect(KILL cr);
11618
11619 ins_cost(300);
11620 format %{ "imull $dst, $src, $imm\t# int" %}
11621 ins_encode %{
11622 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11623 %}
11624 ins_pipe(ialu_reg_reg_alu0);
11625 %}
11626
11627 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11628 %{
11629 predicate(!UseAPX);
11630 match(Set dst (MulI dst (LoadI src)));
11631 effect(KILL cr);
11632
11633 ins_cost(350);
11634 format %{ "imull $dst, $src\t# int" %}
11635 ins_encode %{
11636 __ imull($dst$$Register, $src$$Address);
11637 %}
11638 ins_pipe(ialu_reg_mem_alu0);
11639 %}
11640
11641 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11642 %{
11643 predicate(UseAPX);
11644 match(Set dst (MulI src1 (LoadI src2)));
11645 effect(KILL cr);
11646 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11647
11648 ins_cost(350);
11649 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11650 ins_encode %{
11651 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11652 %}
11653 ins_pipe(ialu_reg_mem_alu0);
11654 %}
11655
11656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11657 %{
11658 match(Set dst (MulI (LoadI src) imm));
11659 effect(KILL cr);
11660
11661 ins_cost(300);
11662 format %{ "imull $dst, $src, $imm\t# int" %}
11663 ins_encode %{
11664 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11665 %}
11666 ins_pipe(ialu_reg_mem_alu0);
11667 %}
11668
11669 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11670 %{
11671 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11672 effect(KILL cr, KILL src2);
11673
11674 expand %{ mulI_rReg(dst, src1, cr);
11675 mulI_rReg(src2, src3, cr);
11676 addI_rReg(dst, src2, cr); %}
11677 %}
11678
11679 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11680 %{
11681 predicate(!UseAPX);
11682 match(Set dst (MulL dst src));
11683 effect(KILL cr);
11684
11685 ins_cost(300);
11686 format %{ "imulq $dst, $src\t# long" %}
11687 ins_encode %{
11688 __ imulq($dst$$Register, $src$$Register);
11689 %}
11690 ins_pipe(ialu_reg_reg_alu0);
11691 %}
11692
11693 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11694 %{
11695 predicate(UseAPX);
11696 match(Set dst (MulL src1 src2));
11697 effect(KILL cr);
11698 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11699
11700 ins_cost(300);
11701 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11702 ins_encode %{
11703 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11704 %}
11705 ins_pipe(ialu_reg_reg_alu0);
11706 %}
11707
11708 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11709 %{
11710 match(Set dst (MulL src imm));
11711 effect(KILL cr);
11712
11713 ins_cost(300);
11714 format %{ "imulq $dst, $src, $imm\t# long" %}
11715 ins_encode %{
11716 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11717 %}
11718 ins_pipe(ialu_reg_reg_alu0);
11719 %}
11720
11721 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11722 %{
11723 predicate(!UseAPX);
11724 match(Set dst (MulL dst (LoadL src)));
11725 effect(KILL cr);
11726
11727 ins_cost(350);
11728 format %{ "imulq $dst, $src\t# long" %}
11729 ins_encode %{
11730 __ imulq($dst$$Register, $src$$Address);
11731 %}
11732 ins_pipe(ialu_reg_mem_alu0);
11733 %}
11734
11735 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11736 %{
11737 predicate(UseAPX);
11738 match(Set dst (MulL src1 (LoadL src2)));
11739 effect(KILL cr);
11740 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11741
11742 ins_cost(350);
11743 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11744 ins_encode %{
11745 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11746 %}
11747 ins_pipe(ialu_reg_mem_alu0);
11748 %}
11749
11750 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11751 %{
11752 match(Set dst (MulL (LoadL src) imm));
11753 effect(KILL cr);
11754
11755 ins_cost(300);
11756 format %{ "imulq $dst, $src, $imm\t# long" %}
11757 ins_encode %{
11758 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11759 %}
11760 ins_pipe(ialu_reg_mem_alu0);
11761 %}
11762
11763 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11764 %{
11765 match(Set dst (MulHiL src rax));
11766 effect(USE_KILL rax, KILL cr);
11767
11768 ins_cost(300);
11769 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11770 ins_encode %{
11771 __ imulq($src$$Register);
11772 %}
11773 ins_pipe(ialu_reg_reg_alu0);
11774 %}
11775
11776 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11777 %{
11778 match(Set dst (UMulHiL src rax));
11779 effect(USE_KILL rax, KILL cr);
11780
11781 ins_cost(300);
11782 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11783 ins_encode %{
11784 __ mulq($src$$Register);
11785 %}
11786 ins_pipe(ialu_reg_reg_alu0);
11787 %}
11788
11789 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11790 rFlagsReg cr)
11791 %{
11792 match(Set rax (DivI rax div));
11793 effect(KILL rdx, KILL cr);
11794
11795 ins_cost(30*100+10*100); // XXX
11796 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11797 "jne,s normal\n\t"
11798 "xorl rdx, rdx\n\t"
11799 "cmpl $div, -1\n\t"
11800 "je,s done\n"
11801 "normal: cdql\n\t"
11802 "idivl $div\n"
11803 "done:" %}
11804 ins_encode(cdql_enc(div));
11805 ins_pipe(ialu_reg_reg_alu0);
11806 %}
11807
11808 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11809 rFlagsReg cr)
11810 %{
11811 match(Set rax (DivL rax div));
11812 effect(KILL rdx, KILL cr);
11813
11814 ins_cost(30*100+10*100); // XXX
11815 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11816 "cmpq rax, rdx\n\t"
11817 "jne,s normal\n\t"
11818 "xorl rdx, rdx\n\t"
11819 "cmpq $div, -1\n\t"
11820 "je,s done\n"
11821 "normal: cdqq\n\t"
11822 "idivq $div\n"
11823 "done:" %}
11824 ins_encode(cdqq_enc(div));
11825 ins_pipe(ialu_reg_reg_alu0);
11826 %}
11827
11828 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11829 %{
11830 match(Set rax (UDivI rax div));
11831 effect(KILL rdx, KILL cr);
11832
11833 ins_cost(300);
11834 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11835 ins_encode %{
11836 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11837 %}
11838 ins_pipe(ialu_reg_reg_alu0);
11839 %}
11840
11841 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11842 %{
11843 match(Set rax (UDivL rax div));
11844 effect(KILL rdx, KILL cr);
11845
11846 ins_cost(300);
11847 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11848 ins_encode %{
11849 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11850 %}
11851 ins_pipe(ialu_reg_reg_alu0);
11852 %}
11853
11854 // Integer DIVMOD with Register, both quotient and mod results
11855 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11856 rFlagsReg cr)
11857 %{
11858 match(DivModI rax div);
11859 effect(KILL cr);
11860
11861 ins_cost(30*100+10*100); // XXX
11862 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11863 "jne,s normal\n\t"
11864 "xorl rdx, rdx\n\t"
11865 "cmpl $div, -1\n\t"
11866 "je,s done\n"
11867 "normal: cdql\n\t"
11868 "idivl $div\n"
11869 "done:" %}
11870 ins_encode(cdql_enc(div));
11871 ins_pipe(pipe_slow);
11872 %}
11873
11874 // Long DIVMOD with Register, both quotient and mod results
11875 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11876 rFlagsReg cr)
11877 %{
11878 match(DivModL rax div);
11879 effect(KILL cr);
11880
11881 ins_cost(30*100+10*100); // XXX
11882 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11883 "cmpq rax, rdx\n\t"
11884 "jne,s normal\n\t"
11885 "xorl rdx, rdx\n\t"
11886 "cmpq $div, -1\n\t"
11887 "je,s done\n"
11888 "normal: cdqq\n\t"
11889 "idivq $div\n"
11890 "done:" %}
11891 ins_encode(cdqq_enc(div));
11892 ins_pipe(pipe_slow);
11893 %}
11894
11895 // Unsigned integer DIVMOD with Register, both quotient and mod results
11896 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11897 no_rax_rdx_RegI div, rFlagsReg cr)
11898 %{
11899 match(UDivModI rax div);
11900 effect(TEMP tmp, KILL cr);
11901
11902 ins_cost(300);
11903 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11904 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11905 %}
11906 ins_encode %{
11907 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11908 %}
11909 ins_pipe(pipe_slow);
11910 %}
11911
11912 // Unsigned long DIVMOD with Register, both quotient and mod results
11913 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11914 no_rax_rdx_RegL div, rFlagsReg cr)
11915 %{
11916 match(UDivModL rax div);
11917 effect(TEMP tmp, KILL cr);
11918
11919 ins_cost(300);
11920 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11921 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11922 %}
11923 ins_encode %{
11924 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11925 %}
11926 ins_pipe(pipe_slow);
11927 %}
11928
11929 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11930 rFlagsReg cr)
11931 %{
11932 match(Set rdx (ModI rax div));
11933 effect(KILL rax, KILL cr);
11934
11935 ins_cost(300); // XXX
11936 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11937 "jne,s normal\n\t"
11938 "xorl rdx, rdx\n\t"
11939 "cmpl $div, -1\n\t"
11940 "je,s done\n"
11941 "normal: cdql\n\t"
11942 "idivl $div\n"
11943 "done:" %}
11944 ins_encode(cdql_enc(div));
11945 ins_pipe(ialu_reg_reg_alu0);
11946 %}
11947
11948 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11949 rFlagsReg cr)
11950 %{
11951 match(Set rdx (ModL rax div));
11952 effect(KILL rax, KILL cr);
11953
11954 ins_cost(300); // XXX
11955 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11956 "cmpq rax, rdx\n\t"
11957 "jne,s normal\n\t"
11958 "xorl rdx, rdx\n\t"
11959 "cmpq $div, -1\n\t"
11960 "je,s done\n"
11961 "normal: cdqq\n\t"
11962 "idivq $div\n"
11963 "done:" %}
11964 ins_encode(cdqq_enc(div));
11965 ins_pipe(ialu_reg_reg_alu0);
11966 %}
11967
11968 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11969 %{
11970 match(Set rdx (UModI rax div));
11971 effect(KILL rax, KILL cr);
11972
11973 ins_cost(300);
11974 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11975 ins_encode %{
11976 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11977 %}
11978 ins_pipe(ialu_reg_reg_alu0);
11979 %}
11980
11981 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11982 %{
11983 match(Set rdx (UModL rax div));
11984 effect(KILL rax, KILL cr);
11985
11986 ins_cost(300);
11987 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11988 ins_encode %{
11989 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11990 %}
11991 ins_pipe(ialu_reg_reg_alu0);
11992 %}
11993
11994 // Integer Shift Instructions
11995 // Shift Left by one, two, three
11996 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11997 %{
11998 predicate(!UseAPX);
11999 match(Set dst (LShiftI dst shift));
12000 effect(KILL cr);
12001
12002 format %{ "sall $dst, $shift" %}
12003 ins_encode %{
12004 __ sall($dst$$Register, $shift$$constant);
12005 %}
12006 ins_pipe(ialu_reg);
12007 %}
12008
12009 // Shift Left by one, two, three
12010 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
12011 %{
12012 predicate(UseAPX);
12013 match(Set dst (LShiftI src shift));
12014 effect(KILL cr);
12015 flag(PD::Flag_ndd_demotable_opr1);
12016
12017 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
12018 ins_encode %{
12019 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12020 %}
12021 ins_pipe(ialu_reg);
12022 %}
12023
12024 // Shift Left by 8-bit immediate
12025 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12026 %{
12027 predicate(!UseAPX);
12028 match(Set dst (LShiftI dst shift));
12029 effect(KILL cr);
12030
12031 format %{ "sall $dst, $shift" %}
12032 ins_encode %{
12033 __ sall($dst$$Register, $shift$$constant);
12034 %}
12035 ins_pipe(ialu_reg);
12036 %}
12037
12038 // Shift Left by 8-bit immediate
12039 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12040 %{
12041 predicate(UseAPX);
12042 match(Set dst (LShiftI src shift));
12043 effect(KILL cr);
12044 flag(PD::Flag_ndd_demotable_opr1);
12045
12046 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12047 ins_encode %{
12048 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12049 %}
12050 ins_pipe(ialu_reg);
12051 %}
12052
12053 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12054 %{
12055 predicate(UseAPX);
12056 match(Set dst (LShiftI (LoadI src) shift));
12057 effect(KILL cr);
12058
12059 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12060 ins_encode %{
12061 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12062 %}
12063 ins_pipe(ialu_reg);
12064 %}
12065
12066 // Shift Left by 8-bit immediate
12067 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12068 %{
12069 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12070 effect(KILL cr);
12071
12072 format %{ "sall $dst, $shift" %}
12073 ins_encode %{
12074 __ sall($dst$$Address, $shift$$constant);
12075 %}
12076 ins_pipe(ialu_mem_imm);
12077 %}
12078
12079 // Shift Left by variable
12080 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12081 %{
12082 predicate(!VM_Version::supports_bmi2());
12083 match(Set dst (LShiftI dst shift));
12084 effect(KILL cr);
12085
12086 format %{ "sall $dst, $shift" %}
12087 ins_encode %{
12088 __ sall($dst$$Register);
12089 %}
12090 ins_pipe(ialu_reg_reg);
12091 %}
12092
12093 // Shift Left by variable
12094 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12095 %{
12096 predicate(!VM_Version::supports_bmi2());
12097 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12098 effect(KILL cr);
12099
12100 format %{ "sall $dst, $shift" %}
12101 ins_encode %{
12102 __ sall($dst$$Address);
12103 %}
12104 ins_pipe(ialu_mem_reg);
12105 %}
12106
12107 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12108 %{
12109 predicate(VM_Version::supports_bmi2());
12110 match(Set dst (LShiftI src shift));
12111
12112 format %{ "shlxl $dst, $src, $shift" %}
12113 ins_encode %{
12114 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12115 %}
12116 ins_pipe(ialu_reg_reg);
12117 %}
12118
12119 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12120 %{
12121 predicate(VM_Version::supports_bmi2());
12122 match(Set dst (LShiftI (LoadI src) shift));
12123 ins_cost(175);
12124 format %{ "shlxl $dst, $src, $shift" %}
12125 ins_encode %{
12126 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12127 %}
12128 ins_pipe(ialu_reg_mem);
12129 %}
12130
12131 // Arithmetic Shift Right by 8-bit immediate
12132 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12133 %{
12134 predicate(!UseAPX);
12135 match(Set dst (RShiftI dst shift));
12136 effect(KILL cr);
12137
12138 format %{ "sarl $dst, $shift" %}
12139 ins_encode %{
12140 __ sarl($dst$$Register, $shift$$constant);
12141 %}
12142 ins_pipe(ialu_mem_imm);
12143 %}
12144
12145 // Arithmetic Shift Right by 8-bit immediate
12146 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12147 %{
12148 predicate(UseAPX);
12149 match(Set dst (RShiftI src shift));
12150 effect(KILL cr);
12151 flag(PD::Flag_ndd_demotable_opr1);
12152
12153 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12154 ins_encode %{
12155 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12156 %}
12157 ins_pipe(ialu_mem_imm);
12158 %}
12159
12160 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12161 %{
12162 predicate(UseAPX);
12163 match(Set dst (RShiftI (LoadI src) shift));
12164 effect(KILL cr);
12165
12166 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12167 ins_encode %{
12168 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12169 %}
12170 ins_pipe(ialu_mem_imm);
12171 %}
12172
12173 // Arithmetic Shift Right by 8-bit immediate
12174 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12175 %{
12176 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12177 effect(KILL cr);
12178
12179 format %{ "sarl $dst, $shift" %}
12180 ins_encode %{
12181 __ sarl($dst$$Address, $shift$$constant);
12182 %}
12183 ins_pipe(ialu_mem_imm);
12184 %}
12185
12186 // Arithmetic Shift Right by variable
12187 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12188 %{
12189 predicate(!VM_Version::supports_bmi2());
12190 match(Set dst (RShiftI dst shift));
12191 effect(KILL cr);
12192
12193 format %{ "sarl $dst, $shift" %}
12194 ins_encode %{
12195 __ sarl($dst$$Register);
12196 %}
12197 ins_pipe(ialu_reg_reg);
12198 %}
12199
12200 // Arithmetic Shift Right by variable
12201 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12202 %{
12203 predicate(!VM_Version::supports_bmi2());
12204 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12205 effect(KILL cr);
12206
12207 format %{ "sarl $dst, $shift" %}
12208 ins_encode %{
12209 __ sarl($dst$$Address);
12210 %}
12211 ins_pipe(ialu_mem_reg);
12212 %}
12213
12214 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12215 %{
12216 predicate(VM_Version::supports_bmi2());
12217 match(Set dst (RShiftI src shift));
12218
12219 format %{ "sarxl $dst, $src, $shift" %}
12220 ins_encode %{
12221 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12222 %}
12223 ins_pipe(ialu_reg_reg);
12224 %}
12225
12226 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12227 %{
12228 predicate(VM_Version::supports_bmi2());
12229 match(Set dst (RShiftI (LoadI src) shift));
12230 ins_cost(175);
12231 format %{ "sarxl $dst, $src, $shift" %}
12232 ins_encode %{
12233 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12234 %}
12235 ins_pipe(ialu_reg_mem);
12236 %}
12237
12238 // Logical Shift Right by 8-bit immediate
12239 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12240 %{
12241 predicate(!UseAPX);
12242 match(Set dst (URShiftI dst shift));
12243 effect(KILL cr);
12244
12245 format %{ "shrl $dst, $shift" %}
12246 ins_encode %{
12247 __ shrl($dst$$Register, $shift$$constant);
12248 %}
12249 ins_pipe(ialu_reg);
12250 %}
12251
12252 // Logical Shift Right by 8-bit immediate
12253 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12254 %{
12255 predicate(UseAPX);
12256 match(Set dst (URShiftI src shift));
12257 effect(KILL cr);
12258 flag(PD::Flag_ndd_demotable_opr1);
12259
12260 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12261 ins_encode %{
12262 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12263 %}
12264 ins_pipe(ialu_reg);
12265 %}
12266
12267 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12268 %{
12269 predicate(UseAPX);
12270 match(Set dst (URShiftI (LoadI src) shift));
12271 effect(KILL cr);
12272
12273 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12274 ins_encode %{
12275 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12276 %}
12277 ins_pipe(ialu_reg);
12278 %}
12279
12280 // Logical Shift Right by 8-bit immediate
12281 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12282 %{
12283 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12284 effect(KILL cr);
12285
12286 format %{ "shrl $dst, $shift" %}
12287 ins_encode %{
12288 __ shrl($dst$$Address, $shift$$constant);
12289 %}
12290 ins_pipe(ialu_mem_imm);
12291 %}
12292
12293 // Logical Shift Right by variable
12294 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12295 %{
12296 predicate(!VM_Version::supports_bmi2());
12297 match(Set dst (URShiftI dst shift));
12298 effect(KILL cr);
12299
12300 format %{ "shrl $dst, $shift" %}
12301 ins_encode %{
12302 __ shrl($dst$$Register);
12303 %}
12304 ins_pipe(ialu_reg_reg);
12305 %}
12306
12307 // Logical Shift Right by variable
12308 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12309 %{
12310 predicate(!VM_Version::supports_bmi2());
12311 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12312 effect(KILL cr);
12313
12314 format %{ "shrl $dst, $shift" %}
12315 ins_encode %{
12316 __ shrl($dst$$Address);
12317 %}
12318 ins_pipe(ialu_mem_reg);
12319 %}
12320
12321 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12322 %{
12323 predicate(VM_Version::supports_bmi2());
12324 match(Set dst (URShiftI src shift));
12325
12326 format %{ "shrxl $dst, $src, $shift" %}
12327 ins_encode %{
12328 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12329 %}
12330 ins_pipe(ialu_reg_reg);
12331 %}
12332
12333 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12334 %{
12335 predicate(VM_Version::supports_bmi2());
12336 match(Set dst (URShiftI (LoadI src) shift));
12337 ins_cost(175);
12338 format %{ "shrxl $dst, $src, $shift" %}
12339 ins_encode %{
12340 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12341 %}
12342 ins_pipe(ialu_reg_mem);
12343 %}
12344
12345 // Long Shift Instructions
12346 // Shift Left by one, two, three
12347 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12348 %{
12349 predicate(!UseAPX);
12350 match(Set dst (LShiftL dst shift));
12351 effect(KILL cr);
12352
12353 format %{ "salq $dst, $shift" %}
12354 ins_encode %{
12355 __ salq($dst$$Register, $shift$$constant);
12356 %}
12357 ins_pipe(ialu_reg);
12358 %}
12359
12360 // Shift Left by one, two, three
12361 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12362 %{
12363 predicate(UseAPX);
12364 match(Set dst (LShiftL src shift));
12365 effect(KILL cr);
12366 flag(PD::Flag_ndd_demotable_opr1);
12367
12368 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12369 ins_encode %{
12370 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12371 %}
12372 ins_pipe(ialu_reg);
12373 %}
12374
12375 // Shift Left by 8-bit immediate
12376 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12377 %{
12378 predicate(!UseAPX);
12379 match(Set dst (LShiftL dst shift));
12380 effect(KILL cr);
12381
12382 format %{ "salq $dst, $shift" %}
12383 ins_encode %{
12384 __ salq($dst$$Register, $shift$$constant);
12385 %}
12386 ins_pipe(ialu_reg);
12387 %}
12388
12389 // Shift Left by 8-bit immediate
12390 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12391 %{
12392 predicate(UseAPX);
12393 match(Set dst (LShiftL src shift));
12394 effect(KILL cr);
12395 flag(PD::Flag_ndd_demotable_opr1);
12396
12397 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12398 ins_encode %{
12399 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12400 %}
12401 ins_pipe(ialu_reg);
12402 %}
12403
12404 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12405 %{
12406 predicate(UseAPX);
12407 match(Set dst (LShiftL (LoadL src) shift));
12408 effect(KILL cr);
12409
12410 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12411 ins_encode %{
12412 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12413 %}
12414 ins_pipe(ialu_reg);
12415 %}
12416
12417 // Shift Left by 8-bit immediate
12418 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12419 %{
12420 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12421 effect(KILL cr);
12422
12423 format %{ "salq $dst, $shift" %}
12424 ins_encode %{
12425 __ salq($dst$$Address, $shift$$constant);
12426 %}
12427 ins_pipe(ialu_mem_imm);
12428 %}
12429
12430 // Shift Left by variable
12431 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12432 %{
12433 predicate(!VM_Version::supports_bmi2());
12434 match(Set dst (LShiftL dst shift));
12435 effect(KILL cr);
12436
12437 format %{ "salq $dst, $shift" %}
12438 ins_encode %{
12439 __ salq($dst$$Register);
12440 %}
12441 ins_pipe(ialu_reg_reg);
12442 %}
12443
12444 // Shift Left by variable
12445 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12446 %{
12447 predicate(!VM_Version::supports_bmi2());
12448 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12449 effect(KILL cr);
12450
12451 format %{ "salq $dst, $shift" %}
12452 ins_encode %{
12453 __ salq($dst$$Address);
12454 %}
12455 ins_pipe(ialu_mem_reg);
12456 %}
12457
12458 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12459 %{
12460 predicate(VM_Version::supports_bmi2());
12461 match(Set dst (LShiftL src shift));
12462
12463 format %{ "shlxq $dst, $src, $shift" %}
12464 ins_encode %{
12465 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12466 %}
12467 ins_pipe(ialu_reg_reg);
12468 %}
12469
12470 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12471 %{
12472 predicate(VM_Version::supports_bmi2());
12473 match(Set dst (LShiftL (LoadL src) shift));
12474 ins_cost(175);
12475 format %{ "shlxq $dst, $src, $shift" %}
12476 ins_encode %{
12477 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12478 %}
12479 ins_pipe(ialu_reg_mem);
12480 %}
12481
12482 // Arithmetic Shift Right by 8-bit immediate
12483 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12484 %{
12485 predicate(!UseAPX);
12486 match(Set dst (RShiftL dst shift));
12487 effect(KILL cr);
12488
12489 format %{ "sarq $dst, $shift" %}
12490 ins_encode %{
12491 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12492 %}
12493 ins_pipe(ialu_mem_imm);
12494 %}
12495
12496 // Arithmetic Shift Right by 8-bit immediate
12497 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12498 %{
12499 predicate(UseAPX);
12500 match(Set dst (RShiftL src shift));
12501 effect(KILL cr);
12502 flag(PD::Flag_ndd_demotable_opr1);
12503
12504 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12505 ins_encode %{
12506 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12507 %}
12508 ins_pipe(ialu_mem_imm);
12509 %}
12510
12511 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12512 %{
12513 predicate(UseAPX);
12514 match(Set dst (RShiftL (LoadL src) shift));
12515 effect(KILL cr);
12516
12517 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12518 ins_encode %{
12519 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12520 %}
12521 ins_pipe(ialu_mem_imm);
12522 %}
12523
12524 // Arithmetic Shift Right by 8-bit immediate
12525 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12526 %{
12527 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12528 effect(KILL cr);
12529
12530 format %{ "sarq $dst, $shift" %}
12531 ins_encode %{
12532 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12533 %}
12534 ins_pipe(ialu_mem_imm);
12535 %}
12536
12537 // Arithmetic Shift Right by variable
12538 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12539 %{
12540 predicate(!VM_Version::supports_bmi2());
12541 match(Set dst (RShiftL dst shift));
12542 effect(KILL cr);
12543
12544 format %{ "sarq $dst, $shift" %}
12545 ins_encode %{
12546 __ sarq($dst$$Register);
12547 %}
12548 ins_pipe(ialu_reg_reg);
12549 %}
12550
12551 // Arithmetic Shift Right by variable
12552 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12553 %{
12554 predicate(!VM_Version::supports_bmi2());
12555 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12556 effect(KILL cr);
12557
12558 format %{ "sarq $dst, $shift" %}
12559 ins_encode %{
12560 __ sarq($dst$$Address);
12561 %}
12562 ins_pipe(ialu_mem_reg);
12563 %}
12564
12565 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12566 %{
12567 predicate(VM_Version::supports_bmi2());
12568 match(Set dst (RShiftL src shift));
12569
12570 format %{ "sarxq $dst, $src, $shift" %}
12571 ins_encode %{
12572 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12573 %}
12574 ins_pipe(ialu_reg_reg);
12575 %}
12576
12577 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12578 %{
12579 predicate(VM_Version::supports_bmi2());
12580 match(Set dst (RShiftL (LoadL src) shift));
12581 ins_cost(175);
12582 format %{ "sarxq $dst, $src, $shift" %}
12583 ins_encode %{
12584 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12585 %}
12586 ins_pipe(ialu_reg_mem);
12587 %}
12588
12589 // Logical Shift Right by 8-bit immediate
12590 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12591 %{
12592 predicate(!UseAPX);
12593 match(Set dst (URShiftL dst shift));
12594 effect(KILL cr);
12595
12596 format %{ "shrq $dst, $shift" %}
12597 ins_encode %{
12598 __ shrq($dst$$Register, $shift$$constant);
12599 %}
12600 ins_pipe(ialu_reg);
12601 %}
12602
12603 // Logical Shift Right by 8-bit immediate
12604 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12605 %{
12606 predicate(UseAPX);
12607 match(Set dst (URShiftL src shift));
12608 effect(KILL cr);
12609 flag(PD::Flag_ndd_demotable_opr1);
12610
12611 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12612 ins_encode %{
12613 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12614 %}
12615 ins_pipe(ialu_reg);
12616 %}
12617
12618 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12619 %{
12620 predicate(UseAPX);
12621 match(Set dst (URShiftL (LoadL src) shift));
12622 effect(KILL cr);
12623
12624 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12625 ins_encode %{
12626 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12627 %}
12628 ins_pipe(ialu_reg);
12629 %}
12630
12631 // Logical Shift Right by 8-bit immediate
12632 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12633 %{
12634 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12635 effect(KILL cr);
12636
12637 format %{ "shrq $dst, $shift" %}
12638 ins_encode %{
12639 __ shrq($dst$$Address, $shift$$constant);
12640 %}
12641 ins_pipe(ialu_mem_imm);
12642 %}
12643
12644 // Logical Shift Right by variable
12645 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12646 %{
12647 predicate(!VM_Version::supports_bmi2());
12648 match(Set dst (URShiftL dst shift));
12649 effect(KILL cr);
12650
12651 format %{ "shrq $dst, $shift" %}
12652 ins_encode %{
12653 __ shrq($dst$$Register);
12654 %}
12655 ins_pipe(ialu_reg_reg);
12656 %}
12657
12658 // Logical Shift Right by variable
12659 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12660 %{
12661 predicate(!VM_Version::supports_bmi2());
12662 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12663 effect(KILL cr);
12664
12665 format %{ "shrq $dst, $shift" %}
12666 ins_encode %{
12667 __ shrq($dst$$Address);
12668 %}
12669 ins_pipe(ialu_mem_reg);
12670 %}
12671
12672 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12673 %{
12674 predicate(VM_Version::supports_bmi2());
12675 match(Set dst (URShiftL src shift));
12676
12677 format %{ "shrxq $dst, $src, $shift" %}
12678 ins_encode %{
12679 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12680 %}
12681 ins_pipe(ialu_reg_reg);
12682 %}
12683
12684 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12685 %{
12686 predicate(VM_Version::supports_bmi2());
12687 match(Set dst (URShiftL (LoadL src) shift));
12688 ins_cost(175);
12689 format %{ "shrxq $dst, $src, $shift" %}
12690 ins_encode %{
12691 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12692 %}
12693 ins_pipe(ialu_reg_mem);
12694 %}
12695
12696 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12697 // This idiom is used by the compiler for the i2b bytecode.
12698 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12699 %{
12700 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12701
12702 format %{ "movsbl $dst, $src\t# i2b" %}
12703 ins_encode %{
12704 __ movsbl($dst$$Register, $src$$Register);
12705 %}
12706 ins_pipe(ialu_reg_reg);
12707 %}
12708
12709 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12710 // This idiom is used by the compiler the i2s bytecode.
12711 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12712 %{
12713 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12714
12715 format %{ "movswl $dst, $src\t# i2s" %}
12716 ins_encode %{
12717 __ movswl($dst$$Register, $src$$Register);
12718 %}
12719 ins_pipe(ialu_reg_reg);
12720 %}
12721
12722 // ROL/ROR instructions
12723
12724 // Rotate left by constant.
12725 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12726 %{
12727 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12728 match(Set dst (RotateLeft dst shift));
12729 effect(KILL cr);
12730 format %{ "roll $dst, $shift" %}
12731 ins_encode %{
12732 __ roll($dst$$Register, $shift$$constant);
12733 %}
12734 ins_pipe(ialu_reg);
12735 %}
12736
12737 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12738 %{
12739 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12740 match(Set dst (RotateLeft src shift));
12741 format %{ "rolxl $dst, $src, $shift" %}
12742 ins_encode %{
12743 int shift = 32 - ($shift$$constant & 31);
12744 __ rorxl($dst$$Register, $src$$Register, shift);
12745 %}
12746 ins_pipe(ialu_reg_reg);
12747 %}
12748
12749 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12750 %{
12751 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12752 match(Set dst (RotateLeft (LoadI src) shift));
12753 ins_cost(175);
12754 format %{ "rolxl $dst, $src, $shift" %}
12755 ins_encode %{
12756 int shift = 32 - ($shift$$constant & 31);
12757 __ rorxl($dst$$Register, $src$$Address, shift);
12758 %}
12759 ins_pipe(ialu_reg_mem);
12760 %}
12761
12762 // Rotate Left by variable
12763 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12764 %{
12765 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12766 match(Set dst (RotateLeft dst shift));
12767 effect(KILL cr);
12768 format %{ "roll $dst, $shift" %}
12769 ins_encode %{
12770 __ roll($dst$$Register);
12771 %}
12772 ins_pipe(ialu_reg_reg);
12773 %}
12774
12775 // Rotate Left by variable
12776 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12777 %{
12778 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12779 match(Set dst (RotateLeft src shift));
12780 effect(KILL cr);
12781 flag(PD::Flag_ndd_demotable_opr1);
12782
12783 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12784 ins_encode %{
12785 __ eroll($dst$$Register, $src$$Register, false);
12786 %}
12787 ins_pipe(ialu_reg_reg);
12788 %}
12789
12790 // Rotate Right by constant.
12791 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12792 %{
12793 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12794 match(Set dst (RotateRight dst shift));
12795 effect(KILL cr);
12796 format %{ "rorl $dst, $shift" %}
12797 ins_encode %{
12798 __ rorl($dst$$Register, $shift$$constant);
12799 %}
12800 ins_pipe(ialu_reg);
12801 %}
12802
12803 // Rotate Right by constant.
12804 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12805 %{
12806 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12807 match(Set dst (RotateRight src shift));
12808 format %{ "rorxl $dst, $src, $shift" %}
12809 ins_encode %{
12810 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12811 %}
12812 ins_pipe(ialu_reg_reg);
12813 %}
12814
12815 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12816 %{
12817 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12818 match(Set dst (RotateRight (LoadI src) shift));
12819 ins_cost(175);
12820 format %{ "rorxl $dst, $src, $shift" %}
12821 ins_encode %{
12822 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12823 %}
12824 ins_pipe(ialu_reg_mem);
12825 %}
12826
12827 // Rotate Right by variable
12828 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12829 %{
12830 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12831 match(Set dst (RotateRight dst shift));
12832 effect(KILL cr);
12833 format %{ "rorl $dst, $shift" %}
12834 ins_encode %{
12835 __ rorl($dst$$Register);
12836 %}
12837 ins_pipe(ialu_reg_reg);
12838 %}
12839
12840 // Rotate Right by variable
12841 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12842 %{
12843 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12844 match(Set dst (RotateRight src shift));
12845 effect(KILL cr);
12846 flag(PD::Flag_ndd_demotable_opr1);
12847
12848 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12849 ins_encode %{
12850 __ erorl($dst$$Register, $src$$Register, false);
12851 %}
12852 ins_pipe(ialu_reg_reg);
12853 %}
12854
12855 // Rotate Left by constant.
12856 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12857 %{
12858 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12859 match(Set dst (RotateLeft dst shift));
12860 effect(KILL cr);
12861 format %{ "rolq $dst, $shift" %}
12862 ins_encode %{
12863 __ rolq($dst$$Register, $shift$$constant);
12864 %}
12865 ins_pipe(ialu_reg);
12866 %}
12867
12868 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12869 %{
12870 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12871 match(Set dst (RotateLeft src shift));
12872 format %{ "rolxq $dst, $src, $shift" %}
12873 ins_encode %{
12874 int shift = 64 - ($shift$$constant & 63);
12875 __ rorxq($dst$$Register, $src$$Register, shift);
12876 %}
12877 ins_pipe(ialu_reg_reg);
12878 %}
12879
12880 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12881 %{
12882 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12883 match(Set dst (RotateLeft (LoadL src) shift));
12884 ins_cost(175);
12885 format %{ "rolxq $dst, $src, $shift" %}
12886 ins_encode %{
12887 int shift = 64 - ($shift$$constant & 63);
12888 __ rorxq($dst$$Register, $src$$Address, shift);
12889 %}
12890 ins_pipe(ialu_reg_mem);
12891 %}
12892
12893 // Rotate Left by variable
12894 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12895 %{
12896 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12897 match(Set dst (RotateLeft dst shift));
12898 effect(KILL cr);
12899
12900 format %{ "rolq $dst, $shift" %}
12901 ins_encode %{
12902 __ rolq($dst$$Register);
12903 %}
12904 ins_pipe(ialu_reg_reg);
12905 %}
12906
12907 // Rotate Left by variable
12908 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12909 %{
12910 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12911 match(Set dst (RotateLeft src shift));
12912 effect(KILL cr);
12913 flag(PD::Flag_ndd_demotable_opr1);
12914
12915 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12916 ins_encode %{
12917 __ erolq($dst$$Register, $src$$Register, false);
12918 %}
12919 ins_pipe(ialu_reg_reg);
12920 %}
12921
12922 // Rotate Right by constant.
12923 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12924 %{
12925 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12926 match(Set dst (RotateRight dst shift));
12927 effect(KILL cr);
12928 format %{ "rorq $dst, $shift" %}
12929 ins_encode %{
12930 __ rorq($dst$$Register, $shift$$constant);
12931 %}
12932 ins_pipe(ialu_reg);
12933 %}
12934
12935 // Rotate Right by constant
12936 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12937 %{
12938 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12939 match(Set dst (RotateRight src shift));
12940 format %{ "rorxq $dst, $src, $shift" %}
12941 ins_encode %{
12942 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12943 %}
12944 ins_pipe(ialu_reg_reg);
12945 %}
12946
12947 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12948 %{
12949 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12950 match(Set dst (RotateRight (LoadL src) shift));
12951 ins_cost(175);
12952 format %{ "rorxq $dst, $src, $shift" %}
12953 ins_encode %{
12954 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12955 %}
12956 ins_pipe(ialu_reg_mem);
12957 %}
12958
12959 // Rotate Right by variable
12960 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12961 %{
12962 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12963 match(Set dst (RotateRight dst shift));
12964 effect(KILL cr);
12965 format %{ "rorq $dst, $shift" %}
12966 ins_encode %{
12967 __ rorq($dst$$Register);
12968 %}
12969 ins_pipe(ialu_reg_reg);
12970 %}
12971
12972 // Rotate Right by variable
12973 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12974 %{
12975 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12976 match(Set dst (RotateRight src shift));
12977 effect(KILL cr);
12978 flag(PD::Flag_ndd_demotable_opr1);
12979
12980 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12981 ins_encode %{
12982 __ erorq($dst$$Register, $src$$Register, false);
12983 %}
12984 ins_pipe(ialu_reg_reg);
12985 %}
12986
12987 //----------------------------- CompressBits/ExpandBits ------------------------
12988
12989 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12990 predicate(n->bottom_type()->isa_long());
12991 match(Set dst (CompressBits src mask));
12992 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12993 ins_encode %{
12994 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12995 %}
12996 ins_pipe( pipe_slow );
12997 %}
12998
12999 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
13000 predicate(n->bottom_type()->isa_long());
13001 match(Set dst (ExpandBits src mask));
13002 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
13003 ins_encode %{
13004 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
13005 %}
13006 ins_pipe( pipe_slow );
13007 %}
13008
13009 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13010 predicate(n->bottom_type()->isa_long());
13011 match(Set dst (CompressBits src (LoadL mask)));
13012 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
13013 ins_encode %{
13014 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
13015 %}
13016 ins_pipe( pipe_slow );
13017 %}
13018
13019 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13020 predicate(n->bottom_type()->isa_long());
13021 match(Set dst (ExpandBits src (LoadL mask)));
13022 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
13023 ins_encode %{
13024 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
13025 %}
13026 ins_pipe( pipe_slow );
13027 %}
13028
13029
13030 // Logical Instructions
13031
13032 // Integer Logical Instructions
13033
13034 // And Instructions
13035 // And Register with Register
13036 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13037 %{
13038 predicate(!UseAPX);
13039 match(Set dst (AndI dst src));
13040 effect(KILL cr);
13041 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13042
13043 format %{ "andl $dst, $src\t# int" %}
13044 ins_encode %{
13045 __ andl($dst$$Register, $src$$Register);
13046 %}
13047 ins_pipe(ialu_reg_reg);
13048 %}
13049
13050 // And Register with Register using New Data Destination (NDD)
13051 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13052 %{
13053 predicate(UseAPX);
13054 match(Set dst (AndI src1 src2));
13055 effect(KILL cr);
13056 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13057
13058 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13059 ins_encode %{
13060 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13061
13062 %}
13063 ins_pipe(ialu_reg_reg);
13064 %}
13065
13066 // And Register with Immediate 255
13067 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13068 %{
13069 match(Set dst (AndI src mask));
13070
13071 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13072 ins_encode %{
13073 __ movzbl($dst$$Register, $src$$Register);
13074 %}
13075 ins_pipe(ialu_reg);
13076 %}
13077
13078 // And Register with Immediate 255 and promote to long
13079 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13080 %{
13081 match(Set dst (ConvI2L (AndI src mask)));
13082
13083 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13084 ins_encode %{
13085 __ movzbl($dst$$Register, $src$$Register);
13086 %}
13087 ins_pipe(ialu_reg);
13088 %}
13089
13090 // And Register with Immediate 65535
13091 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13092 %{
13093 match(Set dst (AndI src mask));
13094
13095 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13096 ins_encode %{
13097 __ movzwl($dst$$Register, $src$$Register);
13098 %}
13099 ins_pipe(ialu_reg);
13100 %}
13101
13102 // And Register with Immediate 65535 and promote to long
13103 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13104 %{
13105 match(Set dst (ConvI2L (AndI src mask)));
13106
13107 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13108 ins_encode %{
13109 __ movzwl($dst$$Register, $src$$Register);
13110 %}
13111 ins_pipe(ialu_reg);
13112 %}
13113
13114 // Can skip int2long conversions after AND with small bitmask
13115 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13116 %{
13117 predicate(VM_Version::supports_bmi2());
13118 ins_cost(125);
13119 effect(TEMP tmp, KILL cr);
13120 match(Set dst (ConvI2L (AndI src mask)));
13121 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13122 ins_encode %{
13123 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13124 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13125 %}
13126 ins_pipe(ialu_reg_reg);
13127 %}
13128
13129 // And Register with Immediate
13130 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13131 %{
13132 predicate(!UseAPX);
13133 match(Set dst (AndI dst src));
13134 effect(KILL cr);
13135 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13136
13137 format %{ "andl $dst, $src\t# int" %}
13138 ins_encode %{
13139 __ andl($dst$$Register, $src$$constant);
13140 %}
13141 ins_pipe(ialu_reg);
13142 %}
13143
13144 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13145 %{
13146 predicate(UseAPX);
13147 match(Set dst (AndI src1 src2));
13148 effect(KILL cr);
13149 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13150
13151 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13152 ins_encode %{
13153 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13154 %}
13155 ins_pipe(ialu_reg);
13156 %}
13157
13158 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13159 %{
13160 predicate(UseAPX);
13161 match(Set dst (AndI (LoadI src1) src2));
13162 effect(KILL cr);
13163 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13164
13165 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13166 ins_encode %{
13167 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13168 %}
13169 ins_pipe(ialu_reg);
13170 %}
13171
13172 // And Register with Memory
13173 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13174 %{
13175 predicate(!UseAPX);
13176 match(Set dst (AndI dst (LoadI src)));
13177 effect(KILL cr);
13178 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13179
13180 ins_cost(150);
13181 format %{ "andl $dst, $src\t# int" %}
13182 ins_encode %{
13183 __ andl($dst$$Register, $src$$Address);
13184 %}
13185 ins_pipe(ialu_reg_mem);
13186 %}
13187
13188 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13189 %{
13190 predicate(UseAPX);
13191 match(Set dst (AndI src1 (LoadI src2)));
13192 effect(KILL cr);
13193 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13194
13195 ins_cost(150);
13196 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13197 ins_encode %{
13198 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13199 %}
13200 ins_pipe(ialu_reg_mem);
13201 %}
13202
13203 // And Memory with Register
13204 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13205 %{
13206 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13207 effect(KILL cr);
13208 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13209
13210 ins_cost(150);
13211 format %{ "andb $dst, $src\t# byte" %}
13212 ins_encode %{
13213 __ andb($dst$$Address, $src$$Register);
13214 %}
13215 ins_pipe(ialu_mem_reg);
13216 %}
13217
13218 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13219 %{
13220 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13221 effect(KILL cr);
13222 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13223
13224 ins_cost(150);
13225 format %{ "andl $dst, $src\t# int" %}
13226 ins_encode %{
13227 __ andl($dst$$Address, $src$$Register);
13228 %}
13229 ins_pipe(ialu_mem_reg);
13230 %}
13231
13232 // And Memory with Immediate
13233 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13234 %{
13235 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13236 effect(KILL cr);
13237 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13238
13239 ins_cost(125);
13240 format %{ "andl $dst, $src\t# int" %}
13241 ins_encode %{
13242 __ andl($dst$$Address, $src$$constant);
13243 %}
13244 ins_pipe(ialu_mem_imm);
13245 %}
13246
13247 // BMI1 instructions
13248 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13249 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13250 predicate(UseBMI1Instructions);
13251 effect(KILL cr);
13252 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13253
13254 ins_cost(125);
13255 format %{ "andnl $dst, $src1, $src2" %}
13256
13257 ins_encode %{
13258 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13259 %}
13260 ins_pipe(ialu_reg_mem);
13261 %}
13262
13263 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13264 match(Set dst (AndI (XorI src1 minus_1) src2));
13265 predicate(UseBMI1Instructions);
13266 effect(KILL cr);
13267 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13268
13269 format %{ "andnl $dst, $src1, $src2" %}
13270
13271 ins_encode %{
13272 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13273 %}
13274 ins_pipe(ialu_reg);
13275 %}
13276
13277 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13278 match(Set dst (AndI (SubI imm_zero src) src));
13279 predicate(UseBMI1Instructions);
13280 effect(KILL cr);
13281 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13282
13283 format %{ "blsil $dst, $src" %}
13284
13285 ins_encode %{
13286 __ blsil($dst$$Register, $src$$Register);
13287 %}
13288 ins_pipe(ialu_reg);
13289 %}
13290
13291 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13292 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13293 predicate(UseBMI1Instructions);
13294 effect(KILL cr);
13295 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13296
13297 ins_cost(125);
13298 format %{ "blsil $dst, $src" %}
13299
13300 ins_encode %{
13301 __ blsil($dst$$Register, $src$$Address);
13302 %}
13303 ins_pipe(ialu_reg_mem);
13304 %}
13305
13306 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13307 %{
13308 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13309 predicate(UseBMI1Instructions);
13310 effect(KILL cr);
13311 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13312
13313 ins_cost(125);
13314 format %{ "blsmskl $dst, $src" %}
13315
13316 ins_encode %{
13317 __ blsmskl($dst$$Register, $src$$Address);
13318 %}
13319 ins_pipe(ialu_reg_mem);
13320 %}
13321
13322 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13323 %{
13324 match(Set dst (XorI (AddI src minus_1) src));
13325 predicate(UseBMI1Instructions);
13326 effect(KILL cr);
13327 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13328
13329 format %{ "blsmskl $dst, $src" %}
13330
13331 ins_encode %{
13332 __ blsmskl($dst$$Register, $src$$Register);
13333 %}
13334
13335 ins_pipe(ialu_reg);
13336 %}
13337
13338 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13339 %{
13340 match(Set dst (AndI (AddI src minus_1) src) );
13341 predicate(UseBMI1Instructions);
13342 effect(KILL cr);
13343 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13344
13345 format %{ "blsrl $dst, $src" %}
13346
13347 ins_encode %{
13348 __ blsrl($dst$$Register, $src$$Register);
13349 %}
13350
13351 ins_pipe(ialu_reg_mem);
13352 %}
13353
13354 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13355 %{
13356 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13357 predicate(UseBMI1Instructions);
13358 effect(KILL cr);
13359 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13360
13361 ins_cost(125);
13362 format %{ "blsrl $dst, $src" %}
13363
13364 ins_encode %{
13365 __ blsrl($dst$$Register, $src$$Address);
13366 %}
13367
13368 ins_pipe(ialu_reg);
13369 %}
13370
13371 // Or Instructions
13372 // Or Register with Register
13373 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13374 %{
13375 predicate(!UseAPX);
13376 match(Set dst (OrI dst src));
13377 effect(KILL cr);
13378 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13379
13380 format %{ "orl $dst, $src\t# int" %}
13381 ins_encode %{
13382 __ orl($dst$$Register, $src$$Register);
13383 %}
13384 ins_pipe(ialu_reg_reg);
13385 %}
13386
13387 // Or Register with Register using New Data Destination (NDD)
13388 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13389 %{
13390 predicate(UseAPX);
13391 match(Set dst (OrI src1 src2));
13392 effect(KILL cr);
13393 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13394
13395 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13396 ins_encode %{
13397 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13398 %}
13399 ins_pipe(ialu_reg_reg);
13400 %}
13401
13402 // Or Register with Immediate
13403 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13404 %{
13405 predicate(!UseAPX);
13406 match(Set dst (OrI dst src));
13407 effect(KILL cr);
13408 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13409
13410 format %{ "orl $dst, $src\t# int" %}
13411 ins_encode %{
13412 __ orl($dst$$Register, $src$$constant);
13413 %}
13414 ins_pipe(ialu_reg);
13415 %}
13416
13417 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13418 %{
13419 predicate(UseAPX);
13420 match(Set dst (OrI src1 src2));
13421 effect(KILL cr);
13422 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13423
13424 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13425 ins_encode %{
13426 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13427 %}
13428 ins_pipe(ialu_reg);
13429 %}
13430
13431 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13432 %{
13433 predicate(UseAPX);
13434 match(Set dst (OrI src1 src2));
13435 effect(KILL cr);
13436 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13437
13438 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13439 ins_encode %{
13440 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13441 %}
13442 ins_pipe(ialu_reg);
13443 %}
13444
13445 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13446 %{
13447 predicate(UseAPX);
13448 match(Set dst (OrI (LoadI src1) src2));
13449 effect(KILL cr);
13450 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13451
13452 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13453 ins_encode %{
13454 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13455 %}
13456 ins_pipe(ialu_reg);
13457 %}
13458
13459 // Or Register with Memory
13460 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13461 %{
13462 predicate(!UseAPX);
13463 match(Set dst (OrI dst (LoadI src)));
13464 effect(KILL cr);
13465 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13466
13467 ins_cost(150);
13468 format %{ "orl $dst, $src\t# int" %}
13469 ins_encode %{
13470 __ orl($dst$$Register, $src$$Address);
13471 %}
13472 ins_pipe(ialu_reg_mem);
13473 %}
13474
13475 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13476 %{
13477 predicate(UseAPX);
13478 match(Set dst (OrI src1 (LoadI src2)));
13479 effect(KILL cr);
13480 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13481
13482 ins_cost(150);
13483 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13484 ins_encode %{
13485 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13486 %}
13487 ins_pipe(ialu_reg_mem);
13488 %}
13489
13490 // Or Memory with Register
13491 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13492 %{
13493 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13494 effect(KILL cr);
13495 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13496
13497 ins_cost(150);
13498 format %{ "orb $dst, $src\t# byte" %}
13499 ins_encode %{
13500 __ orb($dst$$Address, $src$$Register);
13501 %}
13502 ins_pipe(ialu_mem_reg);
13503 %}
13504
13505 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13506 %{
13507 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13508 effect(KILL cr);
13509 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13510
13511 ins_cost(150);
13512 format %{ "orl $dst, $src\t# int" %}
13513 ins_encode %{
13514 __ orl($dst$$Address, $src$$Register);
13515 %}
13516 ins_pipe(ialu_mem_reg);
13517 %}
13518
13519 // Or Memory with Immediate
13520 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13521 %{
13522 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13523 effect(KILL cr);
13524 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13525
13526 ins_cost(125);
13527 format %{ "orl $dst, $src\t# int" %}
13528 ins_encode %{
13529 __ orl($dst$$Address, $src$$constant);
13530 %}
13531 ins_pipe(ialu_mem_imm);
13532 %}
13533
13534 // Xor Instructions
13535 // Xor Register with Register
13536 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13537 %{
13538 predicate(!UseAPX);
13539 match(Set dst (XorI dst src));
13540 effect(KILL cr);
13541 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13542
13543 format %{ "xorl $dst, $src\t# int" %}
13544 ins_encode %{
13545 __ xorl($dst$$Register, $src$$Register);
13546 %}
13547 ins_pipe(ialu_reg_reg);
13548 %}
13549
13550 // Xor Register with Register using New Data Destination (NDD)
13551 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13552 %{
13553 predicate(UseAPX);
13554 match(Set dst (XorI src1 src2));
13555 effect(KILL cr);
13556 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13557
13558 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13559 ins_encode %{
13560 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13561 %}
13562 ins_pipe(ialu_reg_reg);
13563 %}
13564
13565 // Xor Register with Immediate -1
13566 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13567 %{
13568 predicate(!UseAPX);
13569 match(Set dst (XorI dst imm));
13570
13571 format %{ "notl $dst" %}
13572 ins_encode %{
13573 __ notl($dst$$Register);
13574 %}
13575 ins_pipe(ialu_reg);
13576 %}
13577
13578 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13579 %{
13580 match(Set dst (XorI src imm));
13581 predicate(UseAPX);
13582 flag(PD::Flag_ndd_demotable_opr1);
13583
13584 format %{ "enotl $dst, $src" %}
13585 ins_encode %{
13586 __ enotl($dst$$Register, $src$$Register);
13587 %}
13588 ins_pipe(ialu_reg);
13589 %}
13590
13591 // Xor Register with Immediate
13592 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13593 %{
13594 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13595 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13596 match(Set dst (XorI dst src));
13597 effect(KILL cr);
13598 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13599
13600 format %{ "xorl $dst, $src\t# int" %}
13601 ins_encode %{
13602 __ xorl($dst$$Register, $src$$constant);
13603 %}
13604 ins_pipe(ialu_reg);
13605 %}
13606
13607 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13608 %{
13609 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13610 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13611 match(Set dst (XorI src1 src2));
13612 effect(KILL cr);
13613 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13614
13615 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13616 ins_encode %{
13617 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13618 %}
13619 ins_pipe(ialu_reg);
13620 %}
13621
13622 // Xor Memory with Immediate
13623 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13624 %{
13625 predicate(UseAPX);
13626 match(Set dst (XorI (LoadI src1) src2));
13627 effect(KILL cr);
13628 ins_cost(150);
13629 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13630
13631 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13632 ins_encode %{
13633 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13634 %}
13635 ins_pipe(ialu_reg);
13636 %}
13637
13638 // Xor Register with Memory
13639 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13640 %{
13641 predicate(!UseAPX);
13642 match(Set dst (XorI dst (LoadI src)));
13643 effect(KILL cr);
13644 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13645
13646 ins_cost(150);
13647 format %{ "xorl $dst, $src\t# int" %}
13648 ins_encode %{
13649 __ xorl($dst$$Register, $src$$Address);
13650 %}
13651 ins_pipe(ialu_reg_mem);
13652 %}
13653
13654 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13655 %{
13656 predicate(UseAPX);
13657 match(Set dst (XorI src1 (LoadI src2)));
13658 effect(KILL cr);
13659 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13660
13661 ins_cost(150);
13662 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13663 ins_encode %{
13664 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13665 %}
13666 ins_pipe(ialu_reg_mem);
13667 %}
13668
13669 // Xor Memory with Register
13670 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13671 %{
13672 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13673 effect(KILL cr);
13674 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13675
13676 ins_cost(150);
13677 format %{ "xorb $dst, $src\t# byte" %}
13678 ins_encode %{
13679 __ xorb($dst$$Address, $src$$Register);
13680 %}
13681 ins_pipe(ialu_mem_reg);
13682 %}
13683
13684 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13685 %{
13686 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13687 effect(KILL cr);
13688 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13689
13690 ins_cost(150);
13691 format %{ "xorl $dst, $src\t# int" %}
13692 ins_encode %{
13693 __ xorl($dst$$Address, $src$$Register);
13694 %}
13695 ins_pipe(ialu_mem_reg);
13696 %}
13697
13698 // Xor Memory with Immediate
13699 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13700 %{
13701 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13702 effect(KILL cr);
13703 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13704
13705 ins_cost(125);
13706 format %{ "xorl $dst, $src\t# int" %}
13707 ins_encode %{
13708 __ xorl($dst$$Address, $src$$constant);
13709 %}
13710 ins_pipe(ialu_mem_imm);
13711 %}
13712
13713
13714 // Long Logical Instructions
13715
13716 // And Instructions
13717 // And Register with Register
13718 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13719 %{
13720 predicate(!UseAPX);
13721 match(Set dst (AndL dst src));
13722 effect(KILL cr);
13723 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13724
13725 format %{ "andq $dst, $src\t# long" %}
13726 ins_encode %{
13727 __ andq($dst$$Register, $src$$Register);
13728 %}
13729 ins_pipe(ialu_reg_reg);
13730 %}
13731
13732 // And Register with Register using New Data Destination (NDD)
13733 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13734 %{
13735 predicate(UseAPX);
13736 match(Set dst (AndL src1 src2));
13737 effect(KILL cr);
13738 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13739
13740 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13741 ins_encode %{
13742 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13743
13744 %}
13745 ins_pipe(ialu_reg_reg);
13746 %}
13747
13748 // And Register with Immediate 255
13749 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13750 %{
13751 match(Set dst (AndL src mask));
13752
13753 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13754 ins_encode %{
13755 // movzbl zeroes out the upper 32-bit and does not need REX.W
13756 __ movzbl($dst$$Register, $src$$Register);
13757 %}
13758 ins_pipe(ialu_reg);
13759 %}
13760
13761 // And Register with Immediate 65535
13762 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13763 %{
13764 match(Set dst (AndL src mask));
13765
13766 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13767 ins_encode %{
13768 // movzwl zeroes out the upper 32-bit and does not need REX.W
13769 __ movzwl($dst$$Register, $src$$Register);
13770 %}
13771 ins_pipe(ialu_reg);
13772 %}
13773
13774 // And Register with Immediate
13775 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13776 %{
13777 predicate(!UseAPX);
13778 match(Set dst (AndL dst src));
13779 effect(KILL cr);
13780 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13781
13782 format %{ "andq $dst, $src\t# long" %}
13783 ins_encode %{
13784 __ andq($dst$$Register, $src$$constant);
13785 %}
13786 ins_pipe(ialu_reg);
13787 %}
13788
13789 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13790 %{
13791 predicate(UseAPX);
13792 match(Set dst (AndL src1 src2));
13793 effect(KILL cr);
13794 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13795
13796 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13797 ins_encode %{
13798 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13799 %}
13800 ins_pipe(ialu_reg);
13801 %}
13802
13803 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13804 %{
13805 predicate(UseAPX);
13806 match(Set dst (AndL (LoadL src1) src2));
13807 effect(KILL cr);
13808 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13809
13810 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13811 ins_encode %{
13812 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13813 %}
13814 ins_pipe(ialu_reg);
13815 %}
13816
13817 // And Register with Memory
13818 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13819 %{
13820 predicate(!UseAPX);
13821 match(Set dst (AndL dst (LoadL src)));
13822 effect(KILL cr);
13823 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13824
13825 ins_cost(150);
13826 format %{ "andq $dst, $src\t# long" %}
13827 ins_encode %{
13828 __ andq($dst$$Register, $src$$Address);
13829 %}
13830 ins_pipe(ialu_reg_mem);
13831 %}
13832
13833 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13834 %{
13835 predicate(UseAPX);
13836 match(Set dst (AndL src1 (LoadL src2)));
13837 effect(KILL cr);
13838 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13839
13840 ins_cost(150);
13841 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13842 ins_encode %{
13843 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13844 %}
13845 ins_pipe(ialu_reg_mem);
13846 %}
13847
13848 // And Memory with Register
13849 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13850 %{
13851 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13852 effect(KILL cr);
13853 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13854
13855 ins_cost(150);
13856 format %{ "andq $dst, $src\t# long" %}
13857 ins_encode %{
13858 __ andq($dst$$Address, $src$$Register);
13859 %}
13860 ins_pipe(ialu_mem_reg);
13861 %}
13862
13863 // And Memory with Immediate
13864 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13865 %{
13866 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13867 effect(KILL cr);
13868 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13869
13870 ins_cost(125);
13871 format %{ "andq $dst, $src\t# long" %}
13872 ins_encode %{
13873 __ andq($dst$$Address, $src$$constant);
13874 %}
13875 ins_pipe(ialu_mem_imm);
13876 %}
13877
13878 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13879 %{
13880 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13881 // because AND/OR works well enough for 8/32-bit values.
13882 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13883
13884 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13885 effect(KILL cr);
13886
13887 ins_cost(125);
13888 format %{ "btrq $dst, log2(not($con))\t# long" %}
13889 ins_encode %{
13890 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13891 %}
13892 ins_pipe(ialu_mem_imm);
13893 %}
13894
13895 // BMI1 instructions
13896 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13897 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13898 predicate(UseBMI1Instructions);
13899 effect(KILL cr);
13900 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13901
13902 ins_cost(125);
13903 format %{ "andnq $dst, $src1, $src2" %}
13904
13905 ins_encode %{
13906 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13907 %}
13908 ins_pipe(ialu_reg_mem);
13909 %}
13910
13911 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13912 match(Set dst (AndL (XorL src1 minus_1) src2));
13913 predicate(UseBMI1Instructions);
13914 effect(KILL cr);
13915 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13916
13917 format %{ "andnq $dst, $src1, $src2" %}
13918
13919 ins_encode %{
13920 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13921 %}
13922 ins_pipe(ialu_reg_mem);
13923 %}
13924
13925 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13926 match(Set dst (AndL (SubL imm_zero src) src));
13927 predicate(UseBMI1Instructions);
13928 effect(KILL cr);
13929 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13930
13931 format %{ "blsiq $dst, $src" %}
13932
13933 ins_encode %{
13934 __ blsiq($dst$$Register, $src$$Register);
13935 %}
13936 ins_pipe(ialu_reg);
13937 %}
13938
13939 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13940 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13941 predicate(UseBMI1Instructions);
13942 effect(KILL cr);
13943 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13944
13945 ins_cost(125);
13946 format %{ "blsiq $dst, $src" %}
13947
13948 ins_encode %{
13949 __ blsiq($dst$$Register, $src$$Address);
13950 %}
13951 ins_pipe(ialu_reg_mem);
13952 %}
13953
13954 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13955 %{
13956 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13957 predicate(UseBMI1Instructions);
13958 effect(KILL cr);
13959 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13960
13961 ins_cost(125);
13962 format %{ "blsmskq $dst, $src" %}
13963
13964 ins_encode %{
13965 __ blsmskq($dst$$Register, $src$$Address);
13966 %}
13967 ins_pipe(ialu_reg_mem);
13968 %}
13969
13970 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13971 %{
13972 match(Set dst (XorL (AddL src minus_1) src));
13973 predicate(UseBMI1Instructions);
13974 effect(KILL cr);
13975 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13976
13977 format %{ "blsmskq $dst, $src" %}
13978
13979 ins_encode %{
13980 __ blsmskq($dst$$Register, $src$$Register);
13981 %}
13982
13983 ins_pipe(ialu_reg);
13984 %}
13985
13986 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13987 %{
13988 match(Set dst (AndL (AddL src minus_1) src) );
13989 predicate(UseBMI1Instructions);
13990 effect(KILL cr);
13991 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13992
13993 format %{ "blsrq $dst, $src" %}
13994
13995 ins_encode %{
13996 __ blsrq($dst$$Register, $src$$Register);
13997 %}
13998
13999 ins_pipe(ialu_reg);
14000 %}
14001
14002 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
14003 %{
14004 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
14005 predicate(UseBMI1Instructions);
14006 effect(KILL cr);
14007 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
14008
14009 ins_cost(125);
14010 format %{ "blsrq $dst, $src" %}
14011
14012 ins_encode %{
14013 __ blsrq($dst$$Register, $src$$Address);
14014 %}
14015
14016 ins_pipe(ialu_reg);
14017 %}
14018
14019 // Or Instructions
14020 // Or Register with Register
14021 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14022 %{
14023 predicate(!UseAPX);
14024 match(Set dst (OrL dst src));
14025 effect(KILL cr);
14026 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14027
14028 format %{ "orq $dst, $src\t# long" %}
14029 ins_encode %{
14030 __ orq($dst$$Register, $src$$Register);
14031 %}
14032 ins_pipe(ialu_reg_reg);
14033 %}
14034
14035 // Or Register with Register using New Data Destination (NDD)
14036 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14037 %{
14038 predicate(UseAPX);
14039 match(Set dst (OrL src1 src2));
14040 effect(KILL cr);
14041 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14042
14043 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14044 ins_encode %{
14045 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14046
14047 %}
14048 ins_pipe(ialu_reg_reg);
14049 %}
14050
14051 // Use any_RegP to match R15 (TLS register) without spilling.
14052 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14053 match(Set dst (OrL dst (CastP2X src)));
14054 effect(KILL cr);
14055 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056
14057 format %{ "orq $dst, $src\t# long" %}
14058 ins_encode %{
14059 __ orq($dst$$Register, $src$$Register);
14060 %}
14061 ins_pipe(ialu_reg_reg);
14062 %}
14063
14064 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14065 match(Set dst (OrL src1 (CastP2X src2)));
14066 effect(KILL cr);
14067 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14068
14069 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14070 ins_encode %{
14071 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14072 %}
14073 ins_pipe(ialu_reg_reg);
14074 %}
14075
14076 // Or Register with Immediate
14077 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14078 %{
14079 predicate(!UseAPX);
14080 match(Set dst (OrL dst src));
14081 effect(KILL cr);
14082 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14083
14084 format %{ "orq $dst, $src\t# long" %}
14085 ins_encode %{
14086 __ orq($dst$$Register, $src$$constant);
14087 %}
14088 ins_pipe(ialu_reg);
14089 %}
14090
14091 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14092 %{
14093 predicate(UseAPX);
14094 match(Set dst (OrL src1 src2));
14095 effect(KILL cr);
14096 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14097
14098 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14099 ins_encode %{
14100 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14101 %}
14102 ins_pipe(ialu_reg);
14103 %}
14104
14105 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14106 %{
14107 predicate(UseAPX);
14108 match(Set dst (OrL src1 src2));
14109 effect(KILL cr);
14110 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14111
14112 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14113 ins_encode %{
14114 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14115 %}
14116 ins_pipe(ialu_reg);
14117 %}
14118
14119 // Or Memory with Immediate
14120 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14121 %{
14122 predicate(UseAPX);
14123 match(Set dst (OrL (LoadL src1) src2));
14124 effect(KILL cr);
14125 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14126
14127 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14128 ins_encode %{
14129 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14130 %}
14131 ins_pipe(ialu_reg);
14132 %}
14133
14134 // Or Register with Memory
14135 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14136 %{
14137 predicate(!UseAPX);
14138 match(Set dst (OrL dst (LoadL src)));
14139 effect(KILL cr);
14140 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14141
14142 ins_cost(150);
14143 format %{ "orq $dst, $src\t# long" %}
14144 ins_encode %{
14145 __ orq($dst$$Register, $src$$Address);
14146 %}
14147 ins_pipe(ialu_reg_mem);
14148 %}
14149
14150 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14151 %{
14152 predicate(UseAPX);
14153 match(Set dst (OrL src1 (LoadL src2)));
14154 effect(KILL cr);
14155 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14156
14157 ins_cost(150);
14158 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14159 ins_encode %{
14160 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14161 %}
14162 ins_pipe(ialu_reg_mem);
14163 %}
14164
14165 // Or Memory with Register
14166 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14167 %{
14168 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14169 effect(KILL cr);
14170 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14171
14172 ins_cost(150);
14173 format %{ "orq $dst, $src\t# long" %}
14174 ins_encode %{
14175 __ orq($dst$$Address, $src$$Register);
14176 %}
14177 ins_pipe(ialu_mem_reg);
14178 %}
14179
14180 // Or Memory with Immediate
14181 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14182 %{
14183 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14184 effect(KILL cr);
14185 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14186
14187 ins_cost(125);
14188 format %{ "orq $dst, $src\t# long" %}
14189 ins_encode %{
14190 __ orq($dst$$Address, $src$$constant);
14191 %}
14192 ins_pipe(ialu_mem_imm);
14193 %}
14194
14195 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14196 %{
14197 // con should be a pure 64-bit power of 2 immediate
14198 // because AND/OR works well enough for 8/32-bit values.
14199 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14200
14201 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14202 effect(KILL cr);
14203
14204 ins_cost(125);
14205 format %{ "btsq $dst, log2($con)\t# long" %}
14206 ins_encode %{
14207 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14208 %}
14209 ins_pipe(ialu_mem_imm);
14210 %}
14211
14212 // Xor Instructions
14213 // Xor Register with Register
14214 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14215 %{
14216 predicate(!UseAPX);
14217 match(Set dst (XorL dst src));
14218 effect(KILL cr);
14219 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14220
14221 format %{ "xorq $dst, $src\t# long" %}
14222 ins_encode %{
14223 __ xorq($dst$$Register, $src$$Register);
14224 %}
14225 ins_pipe(ialu_reg_reg);
14226 %}
14227
14228 // Xor Register with Register using New Data Destination (NDD)
14229 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14230 %{
14231 predicate(UseAPX);
14232 match(Set dst (XorL src1 src2));
14233 effect(KILL cr);
14234 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14235
14236 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14237 ins_encode %{
14238 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14239 %}
14240 ins_pipe(ialu_reg_reg);
14241 %}
14242
14243 // Xor Register with Immediate -1
14244 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14245 %{
14246 predicate(!UseAPX);
14247 match(Set dst (XorL dst imm));
14248
14249 format %{ "notq $dst" %}
14250 ins_encode %{
14251 __ notq($dst$$Register);
14252 %}
14253 ins_pipe(ialu_reg);
14254 %}
14255
14256 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14257 %{
14258 predicate(UseAPX);
14259 match(Set dst (XorL src imm));
14260 flag(PD::Flag_ndd_demotable_opr1);
14261
14262 format %{ "enotq $dst, $src" %}
14263 ins_encode %{
14264 __ enotq($dst$$Register, $src$$Register);
14265 %}
14266 ins_pipe(ialu_reg);
14267 %}
14268
14269 // Xor Register with Immediate
14270 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14271 %{
14272 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14273 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14274 match(Set dst (XorL dst src));
14275 effect(KILL cr);
14276 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14277
14278 format %{ "xorq $dst, $src\t# long" %}
14279 ins_encode %{
14280 __ xorq($dst$$Register, $src$$constant);
14281 %}
14282 ins_pipe(ialu_reg);
14283 %}
14284
14285 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14286 %{
14287 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14288 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14289 match(Set dst (XorL src1 src2));
14290 effect(KILL cr);
14291 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14292
14293 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14294 ins_encode %{
14295 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14296 %}
14297 ins_pipe(ialu_reg);
14298 %}
14299
14300 // Xor Memory with Immediate
14301 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14302 %{
14303 predicate(UseAPX);
14304 match(Set dst (XorL (LoadL src1) src2));
14305 effect(KILL cr);
14306 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14307 ins_cost(150);
14308
14309 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14310 ins_encode %{
14311 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14312 %}
14313 ins_pipe(ialu_reg);
14314 %}
14315
14316 // Xor Register with Memory
14317 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14318 %{
14319 predicate(!UseAPX);
14320 match(Set dst (XorL dst (LoadL src)));
14321 effect(KILL cr);
14322 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14323
14324 ins_cost(150);
14325 format %{ "xorq $dst, $src\t# long" %}
14326 ins_encode %{
14327 __ xorq($dst$$Register, $src$$Address);
14328 %}
14329 ins_pipe(ialu_reg_mem);
14330 %}
14331
14332 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14333 %{
14334 predicate(UseAPX);
14335 match(Set dst (XorL src1 (LoadL src2)));
14336 effect(KILL cr);
14337 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14338
14339 ins_cost(150);
14340 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14341 ins_encode %{
14342 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14343 %}
14344 ins_pipe(ialu_reg_mem);
14345 %}
14346
14347 // Xor Memory with Register
14348 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14349 %{
14350 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14351 effect(KILL cr);
14352 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14353
14354 ins_cost(150);
14355 format %{ "xorq $dst, $src\t# long" %}
14356 ins_encode %{
14357 __ xorq($dst$$Address, $src$$Register);
14358 %}
14359 ins_pipe(ialu_mem_reg);
14360 %}
14361
14362 // Xor Memory with Immediate
14363 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14364 %{
14365 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14366 effect(KILL cr);
14367 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14368
14369 ins_cost(125);
14370 format %{ "xorq $dst, $src\t# long" %}
14371 ins_encode %{
14372 __ xorq($dst$$Address, $src$$constant);
14373 %}
14374 ins_pipe(ialu_mem_imm);
14375 %}
14376
14377 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14378 %{
14379 match(Set dst (CmpLTMask p q));
14380 effect(KILL cr);
14381
14382 ins_cost(400);
14383 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14384 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14385 "negl $dst" %}
14386 ins_encode %{
14387 __ cmpl($p$$Register, $q$$Register);
14388 __ setcc(Assembler::less, $dst$$Register);
14389 __ negl($dst$$Register);
14390 %}
14391 ins_pipe(pipe_slow);
14392 %}
14393
14394 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14395 %{
14396 match(Set dst (CmpLTMask dst zero));
14397 effect(KILL cr);
14398
14399 ins_cost(100);
14400 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14401 ins_encode %{
14402 __ sarl($dst$$Register, 31);
14403 %}
14404 ins_pipe(ialu_reg);
14405 %}
14406
14407 /* Better to save a register than avoid a branch */
14408 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14409 %{
14410 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14411 effect(KILL cr);
14412 ins_cost(300);
14413 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14414 "jge done\n\t"
14415 "addl $p,$y\n"
14416 "done: " %}
14417 ins_encode %{
14418 Register Rp = $p$$Register;
14419 Register Rq = $q$$Register;
14420 Register Ry = $y$$Register;
14421 Label done;
14422 __ subl(Rp, Rq);
14423 __ jccb(Assembler::greaterEqual, done);
14424 __ addl(Rp, Ry);
14425 __ bind(done);
14426 %}
14427 ins_pipe(pipe_cmplt);
14428 %}
14429
14430 /* Better to save a register than avoid a branch */
14431 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14432 %{
14433 match(Set y (AndI (CmpLTMask p q) y));
14434 effect(KILL cr);
14435
14436 ins_cost(300);
14437
14438 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14439 "jlt done\n\t"
14440 "xorl $y, $y\n"
14441 "done: " %}
14442 ins_encode %{
14443 Register Rp = $p$$Register;
14444 Register Rq = $q$$Register;
14445 Register Ry = $y$$Register;
14446 Label done;
14447 __ cmpl(Rp, Rq);
14448 __ jccb(Assembler::less, done);
14449 __ xorl(Ry, Ry);
14450 __ bind(done);
14451 %}
14452 ins_pipe(pipe_cmplt);
14453 %}
14454
14455
14456 //---------- FP Instructions------------------------------------------------
14457
14458 // Really expensive, avoid
14459 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14460 %{
14461 match(Set cr (CmpF src1 src2));
14462
14463 ins_cost(500);
14464 format %{ "ucomiss $src1, $src2\n\t"
14465 "jnp,s exit\n\t"
14466 "pushfq\t# saw NaN, set CF\n\t"
14467 "andq [rsp], #0xffffff2b\n\t"
14468 "popfq\n"
14469 "exit:" %}
14470 ins_encode %{
14471 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14472 emit_cmpfp_fixup(masm);
14473 %}
14474 ins_pipe(pipe_slow);
14475 %}
14476
14477 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14478 match(Set cr (CmpF src1 src2));
14479
14480 ins_cost(100);
14481 format %{ "ucomiss $src1, $src2" %}
14482 ins_encode %{
14483 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14484 %}
14485 ins_pipe(pipe_slow);
14486 %}
14487
14488 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14489 match(Set cr (CmpF src1 src2));
14490
14491 ins_cost(100);
14492 format %{ "evucomxss $src1, $src2" %}
14493 ins_encode %{
14494 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14495 %}
14496 ins_pipe(pipe_slow);
14497 %}
14498
14499 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14500 match(Set cr (CmpF src1 (LoadF src2)));
14501
14502 ins_cost(100);
14503 format %{ "ucomiss $src1, $src2" %}
14504 ins_encode %{
14505 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14506 %}
14507 ins_pipe(pipe_slow);
14508 %}
14509
14510 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14511 match(Set cr (CmpF src1 (LoadF src2)));
14512
14513 ins_cost(100);
14514 format %{ "evucomxss $src1, $src2" %}
14515 ins_encode %{
14516 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14517 %}
14518 ins_pipe(pipe_slow);
14519 %}
14520
14521 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14522 match(Set cr (CmpF src con));
14523
14524 ins_cost(100);
14525 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14526 ins_encode %{
14527 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14528 %}
14529 ins_pipe(pipe_slow);
14530 %}
14531
14532 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14533 match(Set cr (CmpF src con));
14534
14535 ins_cost(100);
14536 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14537 ins_encode %{
14538 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14539 %}
14540 ins_pipe(pipe_slow);
14541 %}
14542
14543 // Really expensive, avoid
14544 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14545 %{
14546 match(Set cr (CmpD src1 src2));
14547
14548 ins_cost(500);
14549 format %{ "ucomisd $src1, $src2\n\t"
14550 "jnp,s exit\n\t"
14551 "pushfq\t# saw NaN, set CF\n\t"
14552 "andq [rsp], #0xffffff2b\n\t"
14553 "popfq\n"
14554 "exit:" %}
14555 ins_encode %{
14556 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14557 emit_cmpfp_fixup(masm);
14558 %}
14559 ins_pipe(pipe_slow);
14560 %}
14561
14562 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14563 match(Set cr (CmpD src1 src2));
14564
14565 ins_cost(100);
14566 format %{ "ucomisd $src1, $src2 test" %}
14567 ins_encode %{
14568 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14569 %}
14570 ins_pipe(pipe_slow);
14571 %}
14572
14573 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14574 match(Set cr (CmpD src1 src2));
14575
14576 ins_cost(100);
14577 format %{ "evucomxsd $src1, $src2 test" %}
14578 ins_encode %{
14579 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14580 %}
14581 ins_pipe(pipe_slow);
14582 %}
14583
14584 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14585 match(Set cr (CmpD src1 (LoadD src2)));
14586
14587 ins_cost(100);
14588 format %{ "ucomisd $src1, $src2" %}
14589 ins_encode %{
14590 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14591 %}
14592 ins_pipe(pipe_slow);
14593 %}
14594
14595 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14596 match(Set cr (CmpD src1 (LoadD src2)));
14597
14598 ins_cost(100);
14599 format %{ "evucomxsd $src1, $src2" %}
14600 ins_encode %{
14601 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14602 %}
14603 ins_pipe(pipe_slow);
14604 %}
14605
14606 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14607 match(Set cr (CmpD src con));
14608 ins_cost(100);
14609 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14610 ins_encode %{
14611 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14612 %}
14613 ins_pipe(pipe_slow);
14614 %}
14615
14616 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14617 match(Set cr (CmpD src con));
14618
14619 ins_cost(100);
14620 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14621 ins_encode %{
14622 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14623 %}
14624 ins_pipe(pipe_slow);
14625 %}
14626
14627 // Compare into -1,0,1
14628 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14629 %{
14630 match(Set dst (CmpF3 src1 src2));
14631 effect(KILL cr);
14632
14633 ins_cost(275);
14634 format %{ "ucomiss $src1, $src2\n\t"
14635 "movl $dst, #-1\n\t"
14636 "jp,s done\n\t"
14637 "jb,s done\n\t"
14638 "setne $dst\n\t"
14639 "movzbl $dst, $dst\n"
14640 "done:" %}
14641 ins_encode %{
14642 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14643 emit_cmpfp3(masm, $dst$$Register);
14644 %}
14645 ins_pipe(pipe_slow);
14646 %}
14647
14648 // Compare into -1,0,1
14649 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14650 %{
14651 match(Set dst (CmpF3 src1 (LoadF src2)));
14652 effect(KILL cr);
14653
14654 ins_cost(275);
14655 format %{ "ucomiss $src1, $src2\n\t"
14656 "movl $dst, #-1\n\t"
14657 "jp,s done\n\t"
14658 "jb,s done\n\t"
14659 "setne $dst\n\t"
14660 "movzbl $dst, $dst\n"
14661 "done:" %}
14662 ins_encode %{
14663 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14664 emit_cmpfp3(masm, $dst$$Register);
14665 %}
14666 ins_pipe(pipe_slow);
14667 %}
14668
14669 // Compare into -1,0,1
14670 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14671 match(Set dst (CmpF3 src con));
14672 effect(KILL cr);
14673
14674 ins_cost(275);
14675 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14676 "movl $dst, #-1\n\t"
14677 "jp,s done\n\t"
14678 "jb,s done\n\t"
14679 "setne $dst\n\t"
14680 "movzbl $dst, $dst\n"
14681 "done:" %}
14682 ins_encode %{
14683 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14684 emit_cmpfp3(masm, $dst$$Register);
14685 %}
14686 ins_pipe(pipe_slow);
14687 %}
14688
14689 // Compare into -1,0,1
14690 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14691 %{
14692 match(Set dst (CmpD3 src1 src2));
14693 effect(KILL cr);
14694
14695 ins_cost(275);
14696 format %{ "ucomisd $src1, $src2\n\t"
14697 "movl $dst, #-1\n\t"
14698 "jp,s done\n\t"
14699 "jb,s done\n\t"
14700 "setne $dst\n\t"
14701 "movzbl $dst, $dst\n"
14702 "done:" %}
14703 ins_encode %{
14704 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14705 emit_cmpfp3(masm, $dst$$Register);
14706 %}
14707 ins_pipe(pipe_slow);
14708 %}
14709
14710 // Compare into -1,0,1
14711 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14712 %{
14713 match(Set dst (CmpD3 src1 (LoadD src2)));
14714 effect(KILL cr);
14715
14716 ins_cost(275);
14717 format %{ "ucomisd $src1, $src2\n\t"
14718 "movl $dst, #-1\n\t"
14719 "jp,s done\n\t"
14720 "jb,s done\n\t"
14721 "setne $dst\n\t"
14722 "movzbl $dst, $dst\n"
14723 "done:" %}
14724 ins_encode %{
14725 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14726 emit_cmpfp3(masm, $dst$$Register);
14727 %}
14728 ins_pipe(pipe_slow);
14729 %}
14730
14731 // Compare into -1,0,1
14732 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14733 match(Set dst (CmpD3 src con));
14734 effect(KILL cr);
14735
14736 ins_cost(275);
14737 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14738 "movl $dst, #-1\n\t"
14739 "jp,s done\n\t"
14740 "jb,s done\n\t"
14741 "setne $dst\n\t"
14742 "movzbl $dst, $dst\n"
14743 "done:" %}
14744 ins_encode %{
14745 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14746 emit_cmpfp3(masm, $dst$$Register);
14747 %}
14748 ins_pipe(pipe_slow);
14749 %}
14750
14751 //----------Arithmetic Conversion Instructions---------------------------------
14752
14753 instruct convF2D_reg_reg(regD dst, regF src)
14754 %{
14755 match(Set dst (ConvF2D src));
14756
14757 format %{ "cvtss2sd $dst, $src" %}
14758 ins_encode %{
14759 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14760 %}
14761 ins_pipe(pipe_slow); // XXX
14762 %}
14763
14764 instruct convF2D_reg_mem(regD dst, memory src)
14765 %{
14766 predicate(UseAVX == 0);
14767 match(Set dst (ConvF2D (LoadF src)));
14768
14769 format %{ "cvtss2sd $dst, $src" %}
14770 ins_encode %{
14771 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14772 %}
14773 ins_pipe(pipe_slow); // XXX
14774 %}
14775
14776 instruct convD2F_reg_reg(regF dst, regD src)
14777 %{
14778 match(Set dst (ConvD2F src));
14779
14780 format %{ "cvtsd2ss $dst, $src" %}
14781 ins_encode %{
14782 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14783 %}
14784 ins_pipe(pipe_slow); // XXX
14785 %}
14786
14787 instruct convD2F_reg_mem(regF dst, memory src)
14788 %{
14789 predicate(UseAVX == 0);
14790 match(Set dst (ConvD2F (LoadD src)));
14791
14792 format %{ "cvtsd2ss $dst, $src" %}
14793 ins_encode %{
14794 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14795 %}
14796 ins_pipe(pipe_slow); // XXX
14797 %}
14798
14799 // XXX do mem variants
14800 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14801 %{
14802 predicate(!VM_Version::supports_avx10_2());
14803 match(Set dst (ConvF2I src));
14804 effect(KILL cr);
14805 format %{ "convert_f2i $dst, $src" %}
14806 ins_encode %{
14807 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14808 %}
14809 ins_pipe(pipe_slow);
14810 %}
14811
14812 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14813 %{
14814 predicate(VM_Version::supports_avx10_2());
14815 match(Set dst (ConvF2I src));
14816 format %{ "evcvttss2sisl $dst, $src" %}
14817 ins_encode %{
14818 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14819 %}
14820 ins_pipe(pipe_slow);
14821 %}
14822
14823 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14824 %{
14825 predicate(VM_Version::supports_avx10_2());
14826 match(Set dst (ConvF2I (LoadF src)));
14827 format %{ "evcvttss2sisl $dst, $src" %}
14828 ins_encode %{
14829 __ evcvttss2sisl($dst$$Register, $src$$Address);
14830 %}
14831 ins_pipe(pipe_slow);
14832 %}
14833
14834 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14835 %{
14836 predicate(!VM_Version::supports_avx10_2());
14837 match(Set dst (ConvF2L src));
14838 effect(KILL cr);
14839 format %{ "convert_f2l $dst, $src"%}
14840 ins_encode %{
14841 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14842 %}
14843 ins_pipe(pipe_slow);
14844 %}
14845
14846 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14847 %{
14848 predicate(VM_Version::supports_avx10_2());
14849 match(Set dst (ConvF2L src));
14850 format %{ "evcvttss2sisq $dst, $src" %}
14851 ins_encode %{
14852 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14853 %}
14854 ins_pipe(pipe_slow);
14855 %}
14856
14857 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14858 %{
14859 predicate(VM_Version::supports_avx10_2());
14860 match(Set dst (ConvF2L (LoadF src)));
14861 format %{ "evcvttss2sisq $dst, $src" %}
14862 ins_encode %{
14863 __ evcvttss2sisq($dst$$Register, $src$$Address);
14864 %}
14865 ins_pipe(pipe_slow);
14866 %}
14867
14868 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14869 %{
14870 predicate(!VM_Version::supports_avx10_2());
14871 match(Set dst (ConvD2I src));
14872 effect(KILL cr);
14873 format %{ "convert_d2i $dst, $src"%}
14874 ins_encode %{
14875 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14876 %}
14877 ins_pipe(pipe_slow);
14878 %}
14879
14880 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14881 %{
14882 predicate(VM_Version::supports_avx10_2());
14883 match(Set dst (ConvD2I src));
14884 format %{ "evcvttsd2sisl $dst, $src" %}
14885 ins_encode %{
14886 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14887 %}
14888 ins_pipe(pipe_slow);
14889 %}
14890
14891 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14892 %{
14893 predicate(VM_Version::supports_avx10_2());
14894 match(Set dst (ConvD2I (LoadD src)));
14895 format %{ "evcvttsd2sisl $dst, $src" %}
14896 ins_encode %{
14897 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14898 %}
14899 ins_pipe(pipe_slow);
14900 %}
14901
14902 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14903 %{
14904 predicate(!VM_Version::supports_avx10_2());
14905 match(Set dst (ConvD2L src));
14906 effect(KILL cr);
14907 format %{ "convert_d2l $dst, $src"%}
14908 ins_encode %{
14909 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14910 %}
14911 ins_pipe(pipe_slow);
14912 %}
14913
14914 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14915 %{
14916 predicate(VM_Version::supports_avx10_2());
14917 match(Set dst (ConvD2L src));
14918 format %{ "evcvttsd2sisq $dst, $src" %}
14919 ins_encode %{
14920 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14921 %}
14922 ins_pipe(pipe_slow);
14923 %}
14924
14925 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14926 %{
14927 predicate(VM_Version::supports_avx10_2());
14928 match(Set dst (ConvD2L (LoadD src)));
14929 format %{ "evcvttsd2sisq $dst, $src" %}
14930 ins_encode %{
14931 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14932 %}
14933 ins_pipe(pipe_slow);
14934 %}
14935
14936 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14937 %{
14938 match(Set dst (RoundD src));
14939 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14940 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14941 ins_encode %{
14942 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14943 %}
14944 ins_pipe(pipe_slow);
14945 %}
14946
14947 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14948 %{
14949 match(Set dst (RoundF src));
14950 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14951 format %{ "round_float $dst,$src" %}
14952 ins_encode %{
14953 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14954 %}
14955 ins_pipe(pipe_slow);
14956 %}
14957
14958 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14959 %{
14960 predicate(!UseXmmI2F);
14961 match(Set dst (ConvI2F src));
14962
14963 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14964 ins_encode %{
14965 if (UseAVX > 0) {
14966 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14967 }
14968 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14969 %}
14970 ins_pipe(pipe_slow); // XXX
14971 %}
14972
14973 instruct convI2F_reg_mem(regF dst, memory src)
14974 %{
14975 predicate(UseAVX == 0);
14976 match(Set dst (ConvI2F (LoadI src)));
14977
14978 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14979 ins_encode %{
14980 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14981 %}
14982 ins_pipe(pipe_slow); // XXX
14983 %}
14984
14985 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14986 %{
14987 predicate(!UseXmmI2D);
14988 match(Set dst (ConvI2D src));
14989
14990 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14991 ins_encode %{
14992 if (UseAVX > 0) {
14993 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14994 }
14995 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14996 %}
14997 ins_pipe(pipe_slow); // XXX
14998 %}
14999
15000 instruct convI2D_reg_mem(regD dst, memory src)
15001 %{
15002 predicate(UseAVX == 0);
15003 match(Set dst (ConvI2D (LoadI src)));
15004
15005 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
15006 ins_encode %{
15007 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
15008 %}
15009 ins_pipe(pipe_slow); // XXX
15010 %}
15011
15012 instruct convXI2F_reg(regF dst, rRegI src)
15013 %{
15014 predicate(UseXmmI2F);
15015 match(Set dst (ConvI2F src));
15016
15017 format %{ "movdl $dst, $src\n\t"
15018 "cvtdq2psl $dst, $dst\t# i2f" %}
15019 ins_encode %{
15020 __ movdl($dst$$XMMRegister, $src$$Register);
15021 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
15022 %}
15023 ins_pipe(pipe_slow); // XXX
15024 %}
15025
15026 instruct convXI2D_reg(regD dst, rRegI src)
15027 %{
15028 predicate(UseXmmI2D);
15029 match(Set dst (ConvI2D src));
15030
15031 format %{ "movdl $dst, $src\n\t"
15032 "cvtdq2pdl $dst, $dst\t# i2d" %}
15033 ins_encode %{
15034 __ movdl($dst$$XMMRegister, $src$$Register);
15035 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15036 %}
15037 ins_pipe(pipe_slow); // XXX
15038 %}
15039
15040 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15041 %{
15042 match(Set dst (ConvL2F src));
15043
15044 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15045 ins_encode %{
15046 if (UseAVX > 0) {
15047 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15048 }
15049 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15050 %}
15051 ins_pipe(pipe_slow); // XXX
15052 %}
15053
15054 instruct convL2F_reg_mem(regF dst, memory src)
15055 %{
15056 predicate(UseAVX == 0);
15057 match(Set dst (ConvL2F (LoadL src)));
15058
15059 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15060 ins_encode %{
15061 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15062 %}
15063 ins_pipe(pipe_slow); // XXX
15064 %}
15065
15066 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15067 %{
15068 match(Set dst (ConvL2D src));
15069
15070 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15071 ins_encode %{
15072 if (UseAVX > 0) {
15073 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15074 }
15075 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15076 %}
15077 ins_pipe(pipe_slow); // XXX
15078 %}
15079
15080 instruct convL2D_reg_mem(regD dst, memory src)
15081 %{
15082 predicate(UseAVX == 0);
15083 match(Set dst (ConvL2D (LoadL src)));
15084
15085 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15086 ins_encode %{
15087 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15088 %}
15089 ins_pipe(pipe_slow); // XXX
15090 %}
15091
15092 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15093 %{
15094 match(Set dst (ConvI2L src));
15095
15096 ins_cost(125);
15097 format %{ "movslq $dst, $src\t# i2l" %}
15098 ins_encode %{
15099 __ movslq($dst$$Register, $src$$Register);
15100 %}
15101 ins_pipe(ialu_reg_reg);
15102 %}
15103
15104 // Zero-extend convert int to long
15105 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15106 %{
15107 match(Set dst (AndL (ConvI2L src) mask));
15108
15109 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15110 ins_encode %{
15111 if ($dst$$reg != $src$$reg) {
15112 __ movl($dst$$Register, $src$$Register);
15113 }
15114 %}
15115 ins_pipe(ialu_reg_reg);
15116 %}
15117
15118 // Zero-extend convert int to long
15119 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15120 %{
15121 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15122
15123 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15124 ins_encode %{
15125 __ movl($dst$$Register, $src$$Address);
15126 %}
15127 ins_pipe(ialu_reg_mem);
15128 %}
15129
15130 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15131 %{
15132 match(Set dst (AndL src mask));
15133
15134 format %{ "movl $dst, $src\t# zero-extend long" %}
15135 ins_encode %{
15136 __ movl($dst$$Register, $src$$Register);
15137 %}
15138 ins_pipe(ialu_reg_reg);
15139 %}
15140
15141 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15142 %{
15143 match(Set dst (ConvL2I src));
15144
15145 format %{ "movl $dst, $src\t# l2i" %}
15146 ins_encode %{
15147 __ movl($dst$$Register, $src$$Register);
15148 %}
15149 ins_pipe(ialu_reg_reg);
15150 %}
15151
15152
15153 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15154 match(Set dst (MoveF2I src));
15155 effect(DEF dst, USE src);
15156
15157 ins_cost(125);
15158 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15159 ins_encode %{
15160 __ movl($dst$$Register, Address(rsp, $src$$disp));
15161 %}
15162 ins_pipe(ialu_reg_mem);
15163 %}
15164
15165 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15166 match(Set dst (MoveI2F src));
15167 effect(DEF dst, USE src);
15168
15169 ins_cost(125);
15170 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15171 ins_encode %{
15172 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15173 %}
15174 ins_pipe(pipe_slow);
15175 %}
15176
15177 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15178 match(Set dst (MoveD2L src));
15179 effect(DEF dst, USE src);
15180
15181 ins_cost(125);
15182 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15183 ins_encode %{
15184 __ movq($dst$$Register, Address(rsp, $src$$disp));
15185 %}
15186 ins_pipe(ialu_reg_mem);
15187 %}
15188
15189 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15190 predicate(!UseXmmLoadAndClearUpper);
15191 match(Set dst (MoveL2D src));
15192 effect(DEF dst, USE src);
15193
15194 ins_cost(125);
15195 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15196 ins_encode %{
15197 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15198 %}
15199 ins_pipe(pipe_slow);
15200 %}
15201
15202 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15203 predicate(UseXmmLoadAndClearUpper);
15204 match(Set dst (MoveL2D src));
15205 effect(DEF dst, USE src);
15206
15207 ins_cost(125);
15208 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15209 ins_encode %{
15210 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15211 %}
15212 ins_pipe(pipe_slow);
15213 %}
15214
15215
15216 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15217 match(Set dst (MoveF2I src));
15218 effect(DEF dst, USE src);
15219
15220 ins_cost(95); // XXX
15221 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15222 ins_encode %{
15223 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15224 %}
15225 ins_pipe(pipe_slow);
15226 %}
15227
15228 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15229 match(Set dst (MoveI2F src));
15230 effect(DEF dst, USE src);
15231
15232 ins_cost(100);
15233 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15234 ins_encode %{
15235 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15236 %}
15237 ins_pipe( ialu_mem_reg );
15238 %}
15239
15240 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15241 match(Set dst (MoveD2L src));
15242 effect(DEF dst, USE src);
15243
15244 ins_cost(95); // XXX
15245 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15246 ins_encode %{
15247 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15248 %}
15249 ins_pipe(pipe_slow);
15250 %}
15251
15252 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15253 match(Set dst (MoveL2D src));
15254 effect(DEF dst, USE src);
15255
15256 ins_cost(100);
15257 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15258 ins_encode %{
15259 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15260 %}
15261 ins_pipe(ialu_mem_reg);
15262 %}
15263
15264 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15265 match(Set dst (MoveF2I src));
15266 effect(DEF dst, USE src);
15267 ins_cost(85);
15268 format %{ "movd $dst,$src\t# MoveF2I" %}
15269 ins_encode %{
15270 __ movdl($dst$$Register, $src$$XMMRegister);
15271 %}
15272 ins_pipe( pipe_slow );
15273 %}
15274
15275 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15276 match(Set dst (MoveD2L src));
15277 effect(DEF dst, USE src);
15278 ins_cost(85);
15279 format %{ "movd $dst,$src\t# MoveD2L" %}
15280 ins_encode %{
15281 __ movdq($dst$$Register, $src$$XMMRegister);
15282 %}
15283 ins_pipe( pipe_slow );
15284 %}
15285
15286 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15287 match(Set dst (MoveI2F src));
15288 effect(DEF dst, USE src);
15289 ins_cost(100);
15290 format %{ "movd $dst,$src\t# MoveI2F" %}
15291 ins_encode %{
15292 __ movdl($dst$$XMMRegister, $src$$Register);
15293 %}
15294 ins_pipe( pipe_slow );
15295 %}
15296
15297 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15298 match(Set dst (MoveL2D src));
15299 effect(DEF dst, USE src);
15300 ins_cost(100);
15301 format %{ "movd $dst,$src\t# MoveL2D" %}
15302 ins_encode %{
15303 __ movdq($dst$$XMMRegister, $src$$Register);
15304 %}
15305 ins_pipe( pipe_slow );
15306 %}
15307
15308
15309 // Fast clearing of an array
15310 // Small non-constant lenght ClearArray for non-AVX512 targets.
15311 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15312 Universe dummy, rFlagsReg cr)
15313 %{
15314 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15315 match(Set dummy (ClearArray (Binary cnt base) val));
15316 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15317
15318 format %{ $$template
15319 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15320 $$emit$$"jg LARGE\n\t"
15321 $$emit$$"dec rcx\n\t"
15322 $$emit$$"js DONE\t# Zero length\n\t"
15323 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15324 $$emit$$"dec rcx\n\t"
15325 $$emit$$"jge LOOP\n\t"
15326 $$emit$$"jmp DONE\n\t"
15327 $$emit$$"# LARGE:\n\t"
15328 if (UseFastStosb) {
15329 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15330 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15331 } else if (UseXMMForObjInit) {
15332 $$emit$$"movdq $tmp, $val\n\t"
15333 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15334 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15335 $$emit$$"jmpq L_zero_64_bytes\n\t"
15336 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15337 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15338 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15339 $$emit$$"add 0x40,rax\n\t"
15340 $$emit$$"# L_zero_64_bytes:\n\t"
15341 $$emit$$"sub 0x8,rcx\n\t"
15342 $$emit$$"jge L_loop\n\t"
15343 $$emit$$"add 0x4,rcx\n\t"
15344 $$emit$$"jl L_tail\n\t"
15345 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15346 $$emit$$"add 0x20,rax\n\t"
15347 $$emit$$"sub 0x4,rcx\n\t"
15348 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15349 $$emit$$"add 0x4,rcx\n\t"
15350 $$emit$$"jle L_end\n\t"
15351 $$emit$$"dec rcx\n\t"
15352 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15353 $$emit$$"vmovq xmm0,(rax)\n\t"
15354 $$emit$$"add 0x8,rax\n\t"
15355 $$emit$$"dec rcx\n\t"
15356 $$emit$$"jge L_sloop\n\t"
15357 $$emit$$"# L_end:\n\t"
15358 } else {
15359 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15360 }
15361 $$emit$$"# DONE"
15362 %}
15363 ins_encode %{
15364 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15365 $tmp$$XMMRegister, false, false);
15366 %}
15367 ins_pipe(pipe_slow);
15368 %}
15369
15370 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15371 Universe dummy, rFlagsReg cr)
15372 %{
15373 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15374 match(Set dummy (ClearArray (Binary cnt base) val));
15375 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15376
15377 format %{ $$template
15378 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15379 $$emit$$"jg LARGE\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"js DONE\t# Zero length\n\t"
15382 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15383 $$emit$$"dec rcx\n\t"
15384 $$emit$$"jge LOOP\n\t"
15385 $$emit$$"jmp DONE\n\t"
15386 $$emit$$"# LARGE:\n\t"
15387 if (UseXMMForObjInit) {
15388 $$emit$$"movdq $tmp, $val\n\t"
15389 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15390 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15391 $$emit$$"jmpq L_zero_64_bytes\n\t"
15392 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15393 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15394 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15395 $$emit$$"add 0x40,rax\n\t"
15396 $$emit$$"# L_zero_64_bytes:\n\t"
15397 $$emit$$"sub 0x8,rcx\n\t"
15398 $$emit$$"jge L_loop\n\t"
15399 $$emit$$"add 0x4,rcx\n\t"
15400 $$emit$$"jl L_tail\n\t"
15401 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15402 $$emit$$"add 0x20,rax\n\t"
15403 $$emit$$"sub 0x4,rcx\n\t"
15404 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15405 $$emit$$"add 0x4,rcx\n\t"
15406 $$emit$$"jle L_end\n\t"
15407 $$emit$$"dec rcx\n\t"
15408 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15409 $$emit$$"vmovq xmm0,(rax)\n\t"
15410 $$emit$$"add 0x8,rax\n\t"
15411 $$emit$$"dec rcx\n\t"
15412 $$emit$$"jge L_sloop\n\t"
15413 $$emit$$"# L_end:\n\t"
15414 } else {
15415 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15416 }
15417 $$emit$$"# DONE"
15418 %}
15419 ins_encode %{
15420 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15421 $tmp$$XMMRegister, false, true);
15422 %}
15423 ins_pipe(pipe_slow);
15424 %}
15425
15426 // Small non-constant length ClearArray for AVX512 targets.
15427 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15428 Universe dummy, rFlagsReg cr)
15429 %{
15430 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15431 match(Set dummy (ClearArray (Binary cnt base) val));
15432 ins_cost(125);
15433 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15434
15435 format %{ $$template
15436 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15437 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15438 $$emit$$"jg LARGE\n\t"
15439 $$emit$$"dec rcx\n\t"
15440 $$emit$$"js DONE\t# Zero length\n\t"
15441 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15442 $$emit$$"dec rcx\n\t"
15443 $$emit$$"jge LOOP\n\t"
15444 $$emit$$"jmp DONE\n\t"
15445 $$emit$$"# LARGE:\n\t"
15446 if (UseFastStosb) {
15447 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15448 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15449 } else if (UseXMMForObjInit) {
15450 $$emit$$"mov rdi,rax\n\t"
15451 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15452 $$emit$$"jmpq L_zero_64_bytes\n\t"
15453 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15454 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15455 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15456 $$emit$$"add 0x40,rax\n\t"
15457 $$emit$$"# L_zero_64_bytes:\n\t"
15458 $$emit$$"sub 0x8,rcx\n\t"
15459 $$emit$$"jge L_loop\n\t"
15460 $$emit$$"add 0x4,rcx\n\t"
15461 $$emit$$"jl L_tail\n\t"
15462 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15463 $$emit$$"add 0x20,rax\n\t"
15464 $$emit$$"sub 0x4,rcx\n\t"
15465 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15466 $$emit$$"add 0x4,rcx\n\t"
15467 $$emit$$"jle L_end\n\t"
15468 $$emit$$"dec rcx\n\t"
15469 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15470 $$emit$$"vmovq xmm0,(rax)\n\t"
15471 $$emit$$"add 0x8,rax\n\t"
15472 $$emit$$"dec rcx\n\t"
15473 $$emit$$"jge L_sloop\n\t"
15474 $$emit$$"# L_end:\n\t"
15475 } else {
15476 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15477 }
15478 $$emit$$"# DONE"
15479 %}
15480 ins_encode %{
15481 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15482 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15483 %}
15484 ins_pipe(pipe_slow);
15485 %}
15486
15487 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15488 Universe dummy, rFlagsReg cr)
15489 %{
15490 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15491 match(Set dummy (ClearArray (Binary cnt base) val));
15492 ins_cost(125);
15493 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15494
15495 format %{ $$template
15496 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15497 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15498 $$emit$$"jg LARGE\n\t"
15499 $$emit$$"dec rcx\n\t"
15500 $$emit$$"js DONE\t# Zero length\n\t"
15501 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15502 $$emit$$"dec rcx\n\t"
15503 $$emit$$"jge LOOP\n\t"
15504 $$emit$$"jmp DONE\n\t"
15505 $$emit$$"# LARGE:\n\t"
15506 if (UseFastStosb) {
15507 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15508 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15509 } else if (UseXMMForObjInit) {
15510 $$emit$$"mov rdi,rax\n\t"
15511 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15512 $$emit$$"jmpq L_zero_64_bytes\n\t"
15513 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15514 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15515 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15516 $$emit$$"add 0x40,rax\n\t"
15517 $$emit$$"# L_zero_64_bytes:\n\t"
15518 $$emit$$"sub 0x8,rcx\n\t"
15519 $$emit$$"jge L_loop\n\t"
15520 $$emit$$"add 0x4,rcx\n\t"
15521 $$emit$$"jl L_tail\n\t"
15522 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15523 $$emit$$"add 0x20,rax\n\t"
15524 $$emit$$"sub 0x4,rcx\n\t"
15525 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15526 $$emit$$"add 0x4,rcx\n\t"
15527 $$emit$$"jle L_end\n\t"
15528 $$emit$$"dec rcx\n\t"
15529 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15530 $$emit$$"vmovq xmm0,(rax)\n\t"
15531 $$emit$$"add 0x8,rax\n\t"
15532 $$emit$$"dec rcx\n\t"
15533 $$emit$$"jge L_sloop\n\t"
15534 $$emit$$"# L_end:\n\t"
15535 } else {
15536 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15537 }
15538 $$emit$$"# DONE"
15539 %}
15540 ins_encode %{
15541 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15542 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15543 %}
15544 ins_pipe(pipe_slow);
15545 %}
15546
15547 // Large non-constant length ClearArray for non-AVX512 targets.
15548 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15549 Universe dummy, rFlagsReg cr)
15550 %{
15551 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15552 match(Set dummy (ClearArray (Binary cnt base) val));
15553 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15554
15555 format %{ $$template
15556 if (UseFastStosb) {
15557 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15558 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15559 } else if (UseXMMForObjInit) {
15560 $$emit$$"movdq $tmp, $val\n\t"
15561 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15562 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15563 $$emit$$"jmpq L_zero_64_bytes\n\t"
15564 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15565 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15566 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15567 $$emit$$"add 0x40,rax\n\t"
15568 $$emit$$"# L_zero_64_bytes:\n\t"
15569 $$emit$$"sub 0x8,rcx\n\t"
15570 $$emit$$"jge L_loop\n\t"
15571 $$emit$$"add 0x4,rcx\n\t"
15572 $$emit$$"jl L_tail\n\t"
15573 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15574 $$emit$$"add 0x20,rax\n\t"
15575 $$emit$$"sub 0x4,rcx\n\t"
15576 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15577 $$emit$$"add 0x4,rcx\n\t"
15578 $$emit$$"jle L_end\n\t"
15579 $$emit$$"dec rcx\n\t"
15580 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15581 $$emit$$"vmovq xmm0,(rax)\n\t"
15582 $$emit$$"add 0x8,rax\n\t"
15583 $$emit$$"dec rcx\n\t"
15584 $$emit$$"jge L_sloop\n\t"
15585 $$emit$$"# L_end:\n\t"
15586 } else {
15587 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15588 }
15589 %}
15590 ins_encode %{
15591 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15592 $tmp$$XMMRegister, true, false);
15593 %}
15594 ins_pipe(pipe_slow);
15595 %}
15596
15597 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15598 Universe dummy, rFlagsReg cr)
15599 %{
15600 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15601 match(Set dummy (ClearArray (Binary cnt base) val));
15602 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15603
15604 format %{ $$template
15605 if (UseXMMForObjInit) {
15606 $$emit$$"movdq $tmp, $val\n\t"
15607 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15608 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15609 $$emit$$"jmpq L_zero_64_bytes\n\t"
15610 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15611 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15612 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15613 $$emit$$"add 0x40,rax\n\t"
15614 $$emit$$"# L_zero_64_bytes:\n\t"
15615 $$emit$$"sub 0x8,rcx\n\t"
15616 $$emit$$"jge L_loop\n\t"
15617 $$emit$$"add 0x4,rcx\n\t"
15618 $$emit$$"jl L_tail\n\t"
15619 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15620 $$emit$$"add 0x20,rax\n\t"
15621 $$emit$$"sub 0x4,rcx\n\t"
15622 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15623 $$emit$$"add 0x4,rcx\n\t"
15624 $$emit$$"jle L_end\n\t"
15625 $$emit$$"dec rcx\n\t"
15626 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15627 $$emit$$"vmovq xmm0,(rax)\n\t"
15628 $$emit$$"add 0x8,rax\n\t"
15629 $$emit$$"dec rcx\n\t"
15630 $$emit$$"jge L_sloop\n\t"
15631 $$emit$$"# L_end:\n\t"
15632 } else {
15633 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15634 }
15635 %}
15636 ins_encode %{
15637 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15638 $tmp$$XMMRegister, true, true);
15639 %}
15640 ins_pipe(pipe_slow);
15641 %}
15642
15643 // Large non-constant length ClearArray for AVX512 targets.
15644 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15645 Universe dummy, rFlagsReg cr)
15646 %{
15647 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15648 match(Set dummy (ClearArray (Binary cnt base) val));
15649 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15650
15651 format %{ $$template
15652 if (UseFastStosb) {
15653 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15654 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15655 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15656 } else if (UseXMMForObjInit) {
15657 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15658 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15659 $$emit$$"jmpq L_zero_64_bytes\n\t"
15660 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15661 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15662 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15663 $$emit$$"add 0x40,rax\n\t"
15664 $$emit$$"# L_zero_64_bytes:\n\t"
15665 $$emit$$"sub 0x8,rcx\n\t"
15666 $$emit$$"jge L_loop\n\t"
15667 $$emit$$"add 0x4,rcx\n\t"
15668 $$emit$$"jl L_tail\n\t"
15669 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15670 $$emit$$"add 0x20,rax\n\t"
15671 $$emit$$"sub 0x4,rcx\n\t"
15672 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15673 $$emit$$"add 0x4,rcx\n\t"
15674 $$emit$$"jle L_end\n\t"
15675 $$emit$$"dec rcx\n\t"
15676 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15677 $$emit$$"vmovq xmm0,(rax)\n\t"
15678 $$emit$$"add 0x8,rax\n\t"
15679 $$emit$$"dec rcx\n\t"
15680 $$emit$$"jge L_sloop\n\t"
15681 $$emit$$"# L_end:\n\t"
15682 } else {
15683 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15684 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15685 }
15686 %}
15687 ins_encode %{
15688 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15689 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15690 %}
15691 ins_pipe(pipe_slow);
15692 %}
15693
15694 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15695 Universe dummy, rFlagsReg cr)
15696 %{
15697 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15698 match(Set dummy (ClearArray (Binary cnt base) val));
15699 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15700
15701 format %{ $$template
15702 if (UseFastStosb) {
15703 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15704 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15705 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15706 } else if (UseXMMForObjInit) {
15707 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15708 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15709 $$emit$$"jmpq L_zero_64_bytes\n\t"
15710 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15711 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15712 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15713 $$emit$$"add 0x40,rax\n\t"
15714 $$emit$$"# L_zero_64_bytes:\n\t"
15715 $$emit$$"sub 0x8,rcx\n\t"
15716 $$emit$$"jge L_loop\n\t"
15717 $$emit$$"add 0x4,rcx\n\t"
15718 $$emit$$"jl L_tail\n\t"
15719 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15720 $$emit$$"add 0x20,rax\n\t"
15721 $$emit$$"sub 0x4,rcx\n\t"
15722 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15723 $$emit$$"add 0x4,rcx\n\t"
15724 $$emit$$"jle L_end\n\t"
15725 $$emit$$"dec rcx\n\t"
15726 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15727 $$emit$$"vmovq xmm0,(rax)\n\t"
15728 $$emit$$"add 0x8,rax\n\t"
15729 $$emit$$"dec rcx\n\t"
15730 $$emit$$"jge L_sloop\n\t"
15731 $$emit$$"# L_end:\n\t"
15732 } else {
15733 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15734 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15735 }
15736 %}
15737 ins_encode %{
15738 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15739 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15740 %}
15741 ins_pipe(pipe_slow);
15742 %}
15743
15744 // Small constant length ClearArray for AVX512 targets.
15745 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15746 %{
15747 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15748 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15749 match(Set dummy (ClearArray (Binary cnt base) val));
15750 ins_cost(100);
15751 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15752 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15753 ins_encode %{
15754 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15755 %}
15756 ins_pipe(pipe_slow);
15757 %}
15758
15759 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15760 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15761 %{
15762 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15763 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15764 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15765
15766 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15767 ins_encode %{
15768 __ string_compare($str1$$Register, $str2$$Register,
15769 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15770 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15771 %}
15772 ins_pipe( pipe_slow );
15773 %}
15774
15775 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15776 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15777 %{
15778 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15779 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15780 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15781
15782 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15783 ins_encode %{
15784 __ string_compare($str1$$Register, $str2$$Register,
15785 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15786 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15787 %}
15788 ins_pipe( pipe_slow );
15789 %}
15790
15791 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15792 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15793 %{
15794 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15795 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15796 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15797
15798 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15799 ins_encode %{
15800 __ string_compare($str1$$Register, $str2$$Register,
15801 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15802 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15803 %}
15804 ins_pipe( pipe_slow );
15805 %}
15806
15807 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15808 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15809 %{
15810 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15811 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15812 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15813
15814 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15815 ins_encode %{
15816 __ string_compare($str1$$Register, $str2$$Register,
15817 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15818 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15819 %}
15820 ins_pipe( pipe_slow );
15821 %}
15822
15823 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15824 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15825 %{
15826 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15827 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15828 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15829
15830 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15831 ins_encode %{
15832 __ string_compare($str1$$Register, $str2$$Register,
15833 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15834 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15835 %}
15836 ins_pipe( pipe_slow );
15837 %}
15838
15839 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15840 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15841 %{
15842 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15843 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15844 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15845
15846 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15847 ins_encode %{
15848 __ string_compare($str1$$Register, $str2$$Register,
15849 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15850 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15851 %}
15852 ins_pipe( pipe_slow );
15853 %}
15854
15855 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15856 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15857 %{
15858 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15859 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15860 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15861
15862 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15863 ins_encode %{
15864 __ string_compare($str2$$Register, $str1$$Register,
15865 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15866 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15867 %}
15868 ins_pipe( pipe_slow );
15869 %}
15870
15871 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15872 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15873 %{
15874 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15875 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15876 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15877
15878 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15879 ins_encode %{
15880 __ string_compare($str2$$Register, $str1$$Register,
15881 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15882 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15883 %}
15884 ins_pipe( pipe_slow );
15885 %}
15886
15887 // fast search of substring with known size.
15888 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15889 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15890 %{
15891 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15892 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15893 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15894
15895 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15896 ins_encode %{
15897 int icnt2 = (int)$int_cnt2$$constant;
15898 if (icnt2 >= 16) {
15899 // IndexOf for constant substrings with size >= 16 elements
15900 // which don't need to be loaded through stack.
15901 __ string_indexofC8($str1$$Register, $str2$$Register,
15902 $cnt1$$Register, $cnt2$$Register,
15903 icnt2, $result$$Register,
15904 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15905 } else {
15906 // Small strings are loaded through stack if they cross page boundary.
15907 __ string_indexof($str1$$Register, $str2$$Register,
15908 $cnt1$$Register, $cnt2$$Register,
15909 icnt2, $result$$Register,
15910 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15911 }
15912 %}
15913 ins_pipe( pipe_slow );
15914 %}
15915
15916 // fast search of substring with known size.
15917 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15918 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15919 %{
15920 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15921 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15922 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15923
15924 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15925 ins_encode %{
15926 int icnt2 = (int)$int_cnt2$$constant;
15927 if (icnt2 >= 8) {
15928 // IndexOf for constant substrings with size >= 8 elements
15929 // which don't need to be loaded through stack.
15930 __ string_indexofC8($str1$$Register, $str2$$Register,
15931 $cnt1$$Register, $cnt2$$Register,
15932 icnt2, $result$$Register,
15933 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15934 } else {
15935 // Small strings are loaded through stack if they cross page boundary.
15936 __ string_indexof($str1$$Register, $str2$$Register,
15937 $cnt1$$Register, $cnt2$$Register,
15938 icnt2, $result$$Register,
15939 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15940 }
15941 %}
15942 ins_pipe( pipe_slow );
15943 %}
15944
15945 // fast search of substring with known size.
15946 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15947 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15948 %{
15949 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15950 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15951 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15952
15953 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15954 ins_encode %{
15955 int icnt2 = (int)$int_cnt2$$constant;
15956 if (icnt2 >= 8) {
15957 // IndexOf for constant substrings with size >= 8 elements
15958 // which don't need to be loaded through stack.
15959 __ string_indexofC8($str1$$Register, $str2$$Register,
15960 $cnt1$$Register, $cnt2$$Register,
15961 icnt2, $result$$Register,
15962 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15963 } else {
15964 // Small strings are loaded through stack if they cross page boundary.
15965 __ string_indexof($str1$$Register, $str2$$Register,
15966 $cnt1$$Register, $cnt2$$Register,
15967 icnt2, $result$$Register,
15968 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15969 }
15970 %}
15971 ins_pipe( pipe_slow );
15972 %}
15973
15974 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15975 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15976 %{
15977 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15978 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15979 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15980
15981 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15982 ins_encode %{
15983 __ string_indexof($str1$$Register, $str2$$Register,
15984 $cnt1$$Register, $cnt2$$Register,
15985 (-1), $result$$Register,
15986 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15987 %}
15988 ins_pipe( pipe_slow );
15989 %}
15990
15991 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15992 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15993 %{
15994 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15995 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15996 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15997
15998 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15999 ins_encode %{
16000 __ string_indexof($str1$$Register, $str2$$Register,
16001 $cnt1$$Register, $cnt2$$Register,
16002 (-1), $result$$Register,
16003 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
16004 %}
16005 ins_pipe( pipe_slow );
16006 %}
16007
16008 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
16009 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
16010 %{
16011 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
16012 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16013 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16014
16015 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
16016 ins_encode %{
16017 __ string_indexof($str1$$Register, $str2$$Register,
16018 $cnt1$$Register, $cnt2$$Register,
16019 (-1), $result$$Register,
16020 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
16021 %}
16022 ins_pipe( pipe_slow );
16023 %}
16024
16025 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16026 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16027 %{
16028 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16029 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16030 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16031 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16032 ins_encode %{
16033 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16034 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16035 %}
16036 ins_pipe( pipe_slow );
16037 %}
16038
16039 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16040 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16041 %{
16042 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16043 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16044 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16045 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16046 ins_encode %{
16047 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16048 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16049 %}
16050 ins_pipe( pipe_slow );
16051 %}
16052
16053 // fast string equals
16054 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16055 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16056 %{
16057 predicate(!VM_Version::supports_avx512vlbw());
16058 match(Set result (StrEquals (Binary str1 str2) cnt));
16059 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16060
16061 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16062 ins_encode %{
16063 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16064 $cnt$$Register, $result$$Register, $tmp3$$Register,
16065 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16066 %}
16067 ins_pipe( pipe_slow );
16068 %}
16069
16070 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16071 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16072 %{
16073 predicate(VM_Version::supports_avx512vlbw());
16074 match(Set result (StrEquals (Binary str1 str2) cnt));
16075 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16076
16077 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16078 ins_encode %{
16079 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16080 $cnt$$Register, $result$$Register, $tmp3$$Register,
16081 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16082 %}
16083 ins_pipe( pipe_slow );
16084 %}
16085
16086 // fast array equals
16087 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16088 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16089 %{
16090 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16091 match(Set result (AryEq ary1 ary2));
16092 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16093
16094 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16095 ins_encode %{
16096 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16097 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16098 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16099 %}
16100 ins_pipe( pipe_slow );
16101 %}
16102
16103 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16104 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16105 %{
16106 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16107 match(Set result (AryEq ary1 ary2));
16108 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16109
16110 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16111 ins_encode %{
16112 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16113 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16114 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16115 %}
16116 ins_pipe( pipe_slow );
16117 %}
16118
16119 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16120 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16121 %{
16122 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16123 match(Set result (AryEq ary1 ary2));
16124 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16125
16126 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16127 ins_encode %{
16128 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16129 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16130 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16131 %}
16132 ins_pipe( pipe_slow );
16133 %}
16134
16135 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16136 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16137 %{
16138 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16139 match(Set result (AryEq ary1 ary2));
16140 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16141
16142 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16143 ins_encode %{
16144 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16145 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16146 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16147 %}
16148 ins_pipe( pipe_slow );
16149 %}
16150
16151 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16152 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16153 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16154 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16155 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16156 %{
16157 predicate(UseAVX >= 2);
16158 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16159 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16160 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16161 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16162 USE basic_type, KILL cr);
16163
16164 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16165 ins_encode %{
16166 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16167 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16168 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16169 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16170 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16171 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16172 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16173 %}
16174 ins_pipe( pipe_slow );
16175 %}
16176
16177 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16178 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16179 %{
16180 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16181 match(Set result (CountPositives ary1 len));
16182 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16183
16184 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16185 ins_encode %{
16186 __ count_positives($ary1$$Register, $len$$Register,
16187 $result$$Register, $tmp3$$Register,
16188 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16189 %}
16190 ins_pipe( pipe_slow );
16191 %}
16192
16193 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16194 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16195 %{
16196 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16197 match(Set result (CountPositives ary1 len));
16198 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16199
16200 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16201 ins_encode %{
16202 __ count_positives($ary1$$Register, $len$$Register,
16203 $result$$Register, $tmp3$$Register,
16204 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16205 %}
16206 ins_pipe( pipe_slow );
16207 %}
16208
16209 // fast char[] to byte[] compression
16210 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16211 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16212 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16213 match(Set result (StrCompressedCopy src (Binary dst len)));
16214 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16215 USE_KILL len, KILL tmp5, KILL cr);
16216
16217 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16218 ins_encode %{
16219 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16220 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16221 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16222 knoreg, knoreg);
16223 %}
16224 ins_pipe( pipe_slow );
16225 %}
16226
16227 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16228 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16229 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16230 match(Set result (StrCompressedCopy src (Binary dst len)));
16231 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16232 USE_KILL len, KILL tmp5, KILL cr);
16233
16234 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16235 ins_encode %{
16236 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16237 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16238 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16239 $ktmp1$$KRegister, $ktmp2$$KRegister);
16240 %}
16241 ins_pipe( pipe_slow );
16242 %}
16243 // fast byte[] to char[] inflation
16244 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16245 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16246 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16247 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16248 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16249
16250 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16251 ins_encode %{
16252 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16253 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16254 %}
16255 ins_pipe( pipe_slow );
16256 %}
16257
16258 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16259 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16260 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16261 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16262 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16263
16264 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16265 ins_encode %{
16266 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16267 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16268 %}
16269 ins_pipe( pipe_slow );
16270 %}
16271
16272 // encode char[] to byte[] in ISO_8859_1
16273 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16274 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16275 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16276 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16277 match(Set result (EncodeISOArray src (Binary dst len)));
16278 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16279
16280 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16281 ins_encode %{
16282 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16283 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16284 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16285 %}
16286 ins_pipe( pipe_slow );
16287 %}
16288
16289 // encode char[] to byte[] in ASCII
16290 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16291 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16292 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16293 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16294 match(Set result (EncodeISOArray src (Binary dst len)));
16295 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16296
16297 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16298 ins_encode %{
16299 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16300 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16301 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16302 %}
16303 ins_pipe( pipe_slow );
16304 %}
16305
16306 //----------Overflow Math Instructions-----------------------------------------
16307
16308 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16309 %{
16310 match(Set cr (OverflowAddI op1 op2));
16311 effect(DEF cr, USE_KILL op1, USE op2);
16312
16313 format %{ "addl $op1, $op2\t# overflow check int" %}
16314
16315 ins_encode %{
16316 __ addl($op1$$Register, $op2$$Register);
16317 %}
16318 ins_pipe(ialu_reg_reg);
16319 %}
16320
16321 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16322 %{
16323 match(Set cr (OverflowAddI op1 op2));
16324 effect(DEF cr, USE_KILL op1, USE op2);
16325
16326 format %{ "addl $op1, $op2\t# overflow check int" %}
16327
16328 ins_encode %{
16329 __ addl($op1$$Register, $op2$$constant);
16330 %}
16331 ins_pipe(ialu_reg_reg);
16332 %}
16333
16334 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16335 %{
16336 match(Set cr (OverflowAddL op1 op2));
16337 effect(DEF cr, USE_KILL op1, USE op2);
16338
16339 format %{ "addq $op1, $op2\t# overflow check long" %}
16340 ins_encode %{
16341 __ addq($op1$$Register, $op2$$Register);
16342 %}
16343 ins_pipe(ialu_reg_reg);
16344 %}
16345
16346 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16347 %{
16348 match(Set cr (OverflowAddL op1 op2));
16349 effect(DEF cr, USE_KILL op1, USE op2);
16350
16351 format %{ "addq $op1, $op2\t# overflow check long" %}
16352 ins_encode %{
16353 __ addq($op1$$Register, $op2$$constant);
16354 %}
16355 ins_pipe(ialu_reg_reg);
16356 %}
16357
16358 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16359 %{
16360 match(Set cr (OverflowSubI op1 op2));
16361
16362 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16363 ins_encode %{
16364 __ cmpl($op1$$Register, $op2$$Register);
16365 %}
16366 ins_pipe(ialu_reg_reg);
16367 %}
16368
16369 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16370 %{
16371 match(Set cr (OverflowSubI op1 op2));
16372
16373 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16374 ins_encode %{
16375 __ cmpl($op1$$Register, $op2$$constant);
16376 %}
16377 ins_pipe(ialu_reg_reg);
16378 %}
16379
16380 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16381 %{
16382 match(Set cr (OverflowSubL op1 op2));
16383
16384 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16385 ins_encode %{
16386 __ cmpq($op1$$Register, $op2$$Register);
16387 %}
16388 ins_pipe(ialu_reg_reg);
16389 %}
16390
16391 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16392 %{
16393 match(Set cr (OverflowSubL op1 op2));
16394
16395 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16396 ins_encode %{
16397 __ cmpq($op1$$Register, $op2$$constant);
16398 %}
16399 ins_pipe(ialu_reg_reg);
16400 %}
16401
16402 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16403 %{
16404 match(Set cr (OverflowSubI zero op2));
16405 effect(DEF cr, USE_KILL op2);
16406
16407 format %{ "negl $op2\t# overflow check int" %}
16408 ins_encode %{
16409 __ negl($op2$$Register);
16410 %}
16411 ins_pipe(ialu_reg_reg);
16412 %}
16413
16414 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16415 %{
16416 match(Set cr (OverflowSubL zero op2));
16417 effect(DEF cr, USE_KILL op2);
16418
16419 format %{ "negq $op2\t# overflow check long" %}
16420 ins_encode %{
16421 __ negq($op2$$Register);
16422 %}
16423 ins_pipe(ialu_reg_reg);
16424 %}
16425
16426 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16427 %{
16428 match(Set cr (OverflowMulI op1 op2));
16429 effect(DEF cr, USE_KILL op1, USE op2);
16430
16431 format %{ "imull $op1, $op2\t# overflow check int" %}
16432 ins_encode %{
16433 __ imull($op1$$Register, $op2$$Register);
16434 %}
16435 ins_pipe(ialu_reg_reg_alu0);
16436 %}
16437
16438 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16439 %{
16440 match(Set cr (OverflowMulI op1 op2));
16441 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16442
16443 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16444 ins_encode %{
16445 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16446 %}
16447 ins_pipe(ialu_reg_reg_alu0);
16448 %}
16449
16450 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16451 %{
16452 match(Set cr (OverflowMulL op1 op2));
16453 effect(DEF cr, USE_KILL op1, USE op2);
16454
16455 format %{ "imulq $op1, $op2\t# overflow check long" %}
16456 ins_encode %{
16457 __ imulq($op1$$Register, $op2$$Register);
16458 %}
16459 ins_pipe(ialu_reg_reg_alu0);
16460 %}
16461
16462 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16463 %{
16464 match(Set cr (OverflowMulL op1 op2));
16465 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16466
16467 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16468 ins_encode %{
16469 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16470 %}
16471 ins_pipe(ialu_reg_reg_alu0);
16472 %}
16473
16474
16475 //----------Control Flow Instructions------------------------------------------
16476 // Signed compare Instructions
16477
16478 // XXX more variants!!
16479 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16480 %{
16481 match(Set cr (CmpI op1 op2));
16482 effect(DEF cr, USE op1, USE op2);
16483
16484 format %{ "cmpl $op1, $op2" %}
16485 ins_encode %{
16486 __ cmpl($op1$$Register, $op2$$Register);
16487 %}
16488 ins_pipe(ialu_cr_reg_reg);
16489 %}
16490
16491 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16492 %{
16493 match(Set cr (CmpI op1 op2));
16494
16495 format %{ "cmpl $op1, $op2" %}
16496 ins_encode %{
16497 __ cmpl($op1$$Register, $op2$$constant);
16498 %}
16499 ins_pipe(ialu_cr_reg_imm);
16500 %}
16501
16502 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16503 %{
16504 match(Set cr (CmpI op1 (LoadI op2)));
16505
16506 ins_cost(500); // XXX
16507 format %{ "cmpl $op1, $op2" %}
16508 ins_encode %{
16509 __ cmpl($op1$$Register, $op2$$Address);
16510 %}
16511 ins_pipe(ialu_cr_reg_mem);
16512 %}
16513
16514 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16515 %{
16516 match(Set cr (CmpI src zero));
16517
16518 format %{ "testl $src, $src" %}
16519 ins_encode %{
16520 __ testl($src$$Register, $src$$Register);
16521 %}
16522 ins_pipe(ialu_cr_reg_imm);
16523 %}
16524
16525 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16526 %{
16527 match(Set cr (CmpI (AndI src con) zero));
16528
16529 format %{ "testl $src, $con" %}
16530 ins_encode %{
16531 __ testl($src$$Register, $con$$constant);
16532 %}
16533 ins_pipe(ialu_cr_reg_imm);
16534 %}
16535
16536 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16537 %{
16538 match(Set cr (CmpI (AndI src1 src2) zero));
16539
16540 format %{ "testl $src1, $src2" %}
16541 ins_encode %{
16542 __ testl($src1$$Register, $src2$$Register);
16543 %}
16544 ins_pipe(ialu_cr_reg_imm);
16545 %}
16546
16547 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16548 %{
16549 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16550
16551 format %{ "testl $src, $mem" %}
16552 ins_encode %{
16553 __ testl($src$$Register, $mem$$Address);
16554 %}
16555 ins_pipe(ialu_cr_reg_mem);
16556 %}
16557
16558 // Unsigned compare Instructions; really, same as signed except they
16559 // produce an rFlagsRegU instead of rFlagsReg.
16560 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16561 %{
16562 match(Set cr (CmpU op1 op2));
16563
16564 format %{ "cmpl $op1, $op2\t# unsigned" %}
16565 ins_encode %{
16566 __ cmpl($op1$$Register, $op2$$Register);
16567 %}
16568 ins_pipe(ialu_cr_reg_reg);
16569 %}
16570
16571 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16572 %{
16573 match(Set cr (CmpU op1 op2));
16574
16575 format %{ "cmpl $op1, $op2\t# unsigned" %}
16576 ins_encode %{
16577 __ cmpl($op1$$Register, $op2$$constant);
16578 %}
16579 ins_pipe(ialu_cr_reg_imm);
16580 %}
16581
16582 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16583 %{
16584 match(Set cr (CmpU op1 (LoadI op2)));
16585
16586 ins_cost(500); // XXX
16587 format %{ "cmpl $op1, $op2\t# unsigned" %}
16588 ins_encode %{
16589 __ cmpl($op1$$Register, $op2$$Address);
16590 %}
16591 ins_pipe(ialu_cr_reg_mem);
16592 %}
16593
16594 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16595 %{
16596 match(Set cr (CmpU src zero));
16597
16598 format %{ "testl $src, $src\t# unsigned" %}
16599 ins_encode %{
16600 __ testl($src$$Register, $src$$Register);
16601 %}
16602 ins_pipe(ialu_cr_reg_imm);
16603 %}
16604
16605 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16606 %{
16607 match(Set cr (CmpP op1 op2));
16608
16609 format %{ "cmpq $op1, $op2\t# ptr" %}
16610 ins_encode %{
16611 __ cmpq($op1$$Register, $op2$$Register);
16612 %}
16613 ins_pipe(ialu_cr_reg_reg);
16614 %}
16615
16616 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16617 %{
16618 match(Set cr (CmpP op1 (LoadP op2)));
16619 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16620
16621 ins_cost(500); // XXX
16622 format %{ "cmpq $op1, $op2\t# ptr" %}
16623 ins_encode %{
16624 __ cmpq($op1$$Register, $op2$$Address);
16625 %}
16626 ins_pipe(ialu_cr_reg_mem);
16627 %}
16628
16629 // XXX this is generalized by compP_rReg_mem???
16630 // Compare raw pointer (used in out-of-heap check).
16631 // Only works because non-oop pointers must be raw pointers
16632 // and raw pointers have no anti-dependencies.
16633 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16634 %{
16635 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16636 n->in(2)->as_Load()->barrier_data() == 0);
16637 match(Set cr (CmpP op1 (LoadP op2)));
16638
16639 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16640 ins_encode %{
16641 __ cmpq($op1$$Register, $op2$$Address);
16642 %}
16643 ins_pipe(ialu_cr_reg_mem);
16644 %}
16645
16646 // This will generate a signed flags result. This should be OK since
16647 // any compare to a zero should be eq/neq.
16648 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16649 %{
16650 match(Set cr (CmpP src zero));
16651
16652 format %{ "testq $src, $src\t# ptr" %}
16653 ins_encode %{
16654 __ testq($src$$Register, $src$$Register);
16655 %}
16656 ins_pipe(ialu_cr_reg_imm);
16657 %}
16658
16659 // This will generate a signed flags result. This should be OK since
16660 // any compare to a zero should be eq/neq.
16661 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16662 %{
16663 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16664 n->in(1)->as_Load()->barrier_data() == 0);
16665 match(Set cr (CmpP (LoadP op) zero));
16666
16667 ins_cost(500); // XXX
16668 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16669 ins_encode %{
16670 __ testq($op$$Address, 0xFFFFFFFF);
16671 %}
16672 ins_pipe(ialu_cr_reg_imm);
16673 %}
16674
16675 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16676 %{
16677 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16678 n->in(1)->as_Load()->barrier_data() == 0);
16679 match(Set cr (CmpP (LoadP mem) zero));
16680
16681 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16682 ins_encode %{
16683 __ cmpq(r12, $mem$$Address);
16684 %}
16685 ins_pipe(ialu_cr_reg_mem);
16686 %}
16687
16688 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16689 %{
16690 match(Set cr (CmpN op1 op2));
16691
16692 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16693 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16694 ins_pipe(ialu_cr_reg_reg);
16695 %}
16696
16697 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16698 %{
16699 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16700 match(Set cr (CmpN src (LoadN mem)));
16701
16702 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16703 ins_encode %{
16704 __ cmpl($src$$Register, $mem$$Address);
16705 %}
16706 ins_pipe(ialu_cr_reg_mem);
16707 %}
16708
16709 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16710 match(Set cr (CmpN op1 op2));
16711
16712 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16713 ins_encode %{
16714 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16715 %}
16716 ins_pipe(ialu_cr_reg_imm);
16717 %}
16718
16719 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16720 %{
16721 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16722 match(Set cr (CmpN src (LoadN mem)));
16723
16724 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16725 ins_encode %{
16726 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16727 %}
16728 ins_pipe(ialu_cr_reg_mem);
16729 %}
16730
16731 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16732 match(Set cr (CmpN op1 op2));
16733
16734 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16735 ins_encode %{
16736 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16737 %}
16738 ins_pipe(ialu_cr_reg_imm);
16739 %}
16740
16741 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16742 %{
16743 predicate(!UseCompactObjectHeaders);
16744 match(Set cr (CmpN src (LoadNKlass mem)));
16745
16746 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16747 ins_encode %{
16748 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16749 %}
16750 ins_pipe(ialu_cr_reg_mem);
16751 %}
16752
16753 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16754 match(Set cr (CmpN src zero));
16755
16756 format %{ "testl $src, $src\t# compressed ptr" %}
16757 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16758 ins_pipe(ialu_cr_reg_imm);
16759 %}
16760
16761 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16762 %{
16763 predicate(CompressedOops::base() != nullptr &&
16764 n->in(1)->as_Load()->barrier_data() == 0);
16765 match(Set cr (CmpN (LoadN mem) zero));
16766
16767 ins_cost(500); // XXX
16768 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16769 ins_encode %{
16770 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16771 %}
16772 ins_pipe(ialu_cr_reg_mem);
16773 %}
16774
16775 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16776 %{
16777 predicate(CompressedOops::base() == nullptr &&
16778 n->in(1)->as_Load()->barrier_data() == 0);
16779 match(Set cr (CmpN (LoadN mem) zero));
16780
16781 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16782 ins_encode %{
16783 __ cmpl(r12, $mem$$Address);
16784 %}
16785 ins_pipe(ialu_cr_reg_mem);
16786 %}
16787
16788 // Yanked all unsigned pointer compare operations.
16789 // Pointer compares are done with CmpP which is already unsigned.
16790
16791 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16792 %{
16793 match(Set cr (CmpL op1 op2));
16794
16795 format %{ "cmpq $op1, $op2" %}
16796 ins_encode %{
16797 __ cmpq($op1$$Register, $op2$$Register);
16798 %}
16799 ins_pipe(ialu_cr_reg_reg);
16800 %}
16801
16802 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16803 %{
16804 match(Set cr (CmpL op1 op2));
16805
16806 format %{ "cmpq $op1, $op2" %}
16807 ins_encode %{
16808 __ cmpq($op1$$Register, $op2$$constant);
16809 %}
16810 ins_pipe(ialu_cr_reg_imm);
16811 %}
16812
16813 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16814 %{
16815 match(Set cr (CmpL op1 (LoadL op2)));
16816
16817 format %{ "cmpq $op1, $op2" %}
16818 ins_encode %{
16819 __ cmpq($op1$$Register, $op2$$Address);
16820 %}
16821 ins_pipe(ialu_cr_reg_mem);
16822 %}
16823
16824 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16825 %{
16826 match(Set cr (CmpL src zero));
16827
16828 format %{ "testq $src, $src" %}
16829 ins_encode %{
16830 __ testq($src$$Register, $src$$Register);
16831 %}
16832 ins_pipe(ialu_cr_reg_imm);
16833 %}
16834
16835 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16836 %{
16837 match(Set cr (CmpL (AndL src con) zero));
16838
16839 format %{ "testq $src, $con\t# long" %}
16840 ins_encode %{
16841 __ testq($src$$Register, $con$$constant);
16842 %}
16843 ins_pipe(ialu_cr_reg_imm);
16844 %}
16845
16846 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16847 %{
16848 match(Set cr (CmpL (AndL src1 src2) zero));
16849
16850 format %{ "testq $src1, $src2\t# long" %}
16851 ins_encode %{
16852 __ testq($src1$$Register, $src2$$Register);
16853 %}
16854 ins_pipe(ialu_cr_reg_imm);
16855 %}
16856
16857 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16858 %{
16859 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16860
16861 format %{ "testq $src, $mem" %}
16862 ins_encode %{
16863 __ testq($src$$Register, $mem$$Address);
16864 %}
16865 ins_pipe(ialu_cr_reg_mem);
16866 %}
16867
16868 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16869 %{
16870 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16871
16872 format %{ "testq $src, $mem" %}
16873 ins_encode %{
16874 __ testq($src$$Register, $mem$$Address);
16875 %}
16876 ins_pipe(ialu_cr_reg_mem);
16877 %}
16878
16879 // Manifest a CmpU result in an integer register. Very painful.
16880 // This is the test to avoid.
16881 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16882 %{
16883 match(Set dst (CmpU3 src1 src2));
16884 effect(KILL flags);
16885
16886 ins_cost(275); // XXX
16887 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16888 "movl $dst, -1\n\t"
16889 "jb,u done\n\t"
16890 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16891 "done:" %}
16892 ins_encode %{
16893 Label done;
16894 __ cmpl($src1$$Register, $src2$$Register);
16895 __ movl($dst$$Register, -1);
16896 __ jccb(Assembler::below, done);
16897 __ setcc(Assembler::notZero, $dst$$Register);
16898 __ bind(done);
16899 %}
16900 ins_pipe(pipe_slow);
16901 %}
16902
16903 // Manifest a CmpL result in an integer register. Very painful.
16904 // This is the test to avoid.
16905 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16906 %{
16907 match(Set dst (CmpL3 src1 src2));
16908 effect(KILL flags);
16909
16910 ins_cost(275); // XXX
16911 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16912 "movl $dst, -1\n\t"
16913 "jl,s done\n\t"
16914 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16915 "done:" %}
16916 ins_encode %{
16917 Label done;
16918 __ cmpq($src1$$Register, $src2$$Register);
16919 __ movl($dst$$Register, -1);
16920 __ jccb(Assembler::less, done);
16921 __ setcc(Assembler::notZero, $dst$$Register);
16922 __ bind(done);
16923 %}
16924 ins_pipe(pipe_slow);
16925 %}
16926
16927 // Manifest a CmpUL result in an integer register. Very painful.
16928 // This is the test to avoid.
16929 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16930 %{
16931 match(Set dst (CmpUL3 src1 src2));
16932 effect(KILL flags);
16933
16934 ins_cost(275); // XXX
16935 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16936 "movl $dst, -1\n\t"
16937 "jb,u done\n\t"
16938 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16939 "done:" %}
16940 ins_encode %{
16941 Label done;
16942 __ cmpq($src1$$Register, $src2$$Register);
16943 __ movl($dst$$Register, -1);
16944 __ jccb(Assembler::below, done);
16945 __ setcc(Assembler::notZero, $dst$$Register);
16946 __ bind(done);
16947 %}
16948 ins_pipe(pipe_slow);
16949 %}
16950
16951 // Unsigned long compare Instructions; really, same as signed long except they
16952 // produce an rFlagsRegU instead of rFlagsReg.
16953 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16954 %{
16955 match(Set cr (CmpUL op1 op2));
16956
16957 format %{ "cmpq $op1, $op2\t# unsigned" %}
16958 ins_encode %{
16959 __ cmpq($op1$$Register, $op2$$Register);
16960 %}
16961 ins_pipe(ialu_cr_reg_reg);
16962 %}
16963
16964 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16965 %{
16966 match(Set cr (CmpUL op1 op2));
16967
16968 format %{ "cmpq $op1, $op2\t# unsigned" %}
16969 ins_encode %{
16970 __ cmpq($op1$$Register, $op2$$constant);
16971 %}
16972 ins_pipe(ialu_cr_reg_imm);
16973 %}
16974
16975 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16976 %{
16977 match(Set cr (CmpUL op1 (LoadL op2)));
16978
16979 format %{ "cmpq $op1, $op2\t# unsigned" %}
16980 ins_encode %{
16981 __ cmpq($op1$$Register, $op2$$Address);
16982 %}
16983 ins_pipe(ialu_cr_reg_mem);
16984 %}
16985
16986 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16987 %{
16988 match(Set cr (CmpUL src zero));
16989
16990 format %{ "testq $src, $src\t# unsigned" %}
16991 ins_encode %{
16992 __ testq($src$$Register, $src$$Register);
16993 %}
16994 ins_pipe(ialu_cr_reg_imm);
16995 %}
16996
16997 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16998 %{
16999 match(Set cr (CmpI (LoadB mem) imm));
17000
17001 ins_cost(125);
17002 format %{ "cmpb $mem, $imm" %}
17003 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
17004 ins_pipe(ialu_cr_reg_mem);
17005 %}
17006
17007 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
17008 %{
17009 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
17010
17011 ins_cost(125);
17012 format %{ "testb $mem, $imm\t# ubyte" %}
17013 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17014 ins_pipe(ialu_cr_reg_mem);
17015 %}
17016
17017 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
17018 %{
17019 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
17020
17021 ins_cost(125);
17022 format %{ "testb $mem, $imm\t# byte" %}
17023 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17024 ins_pipe(ialu_cr_reg_mem);
17025 %}
17026
17027 //----------Max and Min--------------------------------------------------------
17028 // Min Instructions
17029
17030 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17031 %{
17032 predicate(!UseAPX);
17033 effect(USE_DEF dst, USE src, USE cr);
17034
17035 format %{ "cmovlgt $dst, $src\t# min" %}
17036 ins_encode %{
17037 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17038 %}
17039 ins_pipe(pipe_cmov_reg);
17040 %}
17041
17042 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17043 %{
17044 predicate(UseAPX);
17045 effect(DEF dst, USE src1, USE src2, USE cr);
17046
17047 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17048 ins_encode %{
17049 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17050 %}
17051 ins_pipe(pipe_cmov_reg);
17052 %}
17053
17054 instruct minI_rReg(rRegI dst, rRegI src)
17055 %{
17056 predicate(!UseAPX);
17057 match(Set dst (MinI dst src));
17058
17059 ins_cost(200);
17060 expand %{
17061 rFlagsReg cr;
17062 compI_rReg(cr, dst, src);
17063 cmovI_reg_g(dst, src, cr);
17064 %}
17065 %}
17066
17067 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17068 %{
17069 predicate(UseAPX);
17070 match(Set dst (MinI src1 src2));
17071 effect(DEF dst, USE src1, USE src2);
17072 flag(PD::Flag_ndd_demotable_opr1);
17073
17074 ins_cost(200);
17075 expand %{
17076 rFlagsReg cr;
17077 compI_rReg(cr, src1, src2);
17078 cmovI_reg_g_ndd(dst, src1, src2, cr);
17079 %}
17080 %}
17081
17082 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17083 %{
17084 predicate(!UseAPX);
17085 effect(USE_DEF dst, USE src, USE cr);
17086
17087 format %{ "cmovllt $dst, $src\t# max" %}
17088 ins_encode %{
17089 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17090 %}
17091 ins_pipe(pipe_cmov_reg);
17092 %}
17093
17094 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17095 %{
17096 predicate(UseAPX);
17097 effect(DEF dst, USE src1, USE src2, USE cr);
17098
17099 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17100 ins_encode %{
17101 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17102 %}
17103 ins_pipe(pipe_cmov_reg);
17104 %}
17105
17106 instruct maxI_rReg(rRegI dst, rRegI src)
17107 %{
17108 predicate(!UseAPX);
17109 match(Set dst (MaxI dst src));
17110
17111 ins_cost(200);
17112 expand %{
17113 rFlagsReg cr;
17114 compI_rReg(cr, dst, src);
17115 cmovI_reg_l(dst, src, cr);
17116 %}
17117 %}
17118
17119 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17120 %{
17121 predicate(UseAPX);
17122 match(Set dst (MaxI src1 src2));
17123 effect(DEF dst, USE src1, USE src2);
17124 flag(PD::Flag_ndd_demotable_opr1);
17125
17126 ins_cost(200);
17127 expand %{
17128 rFlagsReg cr;
17129 compI_rReg(cr, src1, src2);
17130 cmovI_reg_l_ndd(dst, src1, src2, cr);
17131 %}
17132 %}
17133
17134 // ============================================================================
17135 // Branch Instructions
17136
17137 // Jump Direct - Label defines a relative address from JMP+1
17138 instruct jmpDir(label labl)
17139 %{
17140 match(Goto);
17141 effect(USE labl);
17142
17143 ins_cost(300);
17144 format %{ "jmp $labl" %}
17145 size(5);
17146 ins_encode %{
17147 Label* L = $labl$$label;
17148 __ jmp(*L, false); // Always long jump
17149 %}
17150 ins_pipe(pipe_jmp);
17151 %}
17152
17153 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17154 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17155 %{
17156 match(If cop cr);
17157 effect(USE labl);
17158
17159 ins_cost(300);
17160 format %{ "j$cop $labl" %}
17161 size(6);
17162 ins_encode %{
17163 Label* L = $labl$$label;
17164 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17165 %}
17166 ins_pipe(pipe_jcc);
17167 %}
17168
17169 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17170 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17171 %{
17172 match(CountedLoopEnd cop cr);
17173 effect(USE labl);
17174
17175 ins_cost(300);
17176 format %{ "j$cop $labl\t# loop end" %}
17177 size(6);
17178 ins_encode %{
17179 Label* L = $labl$$label;
17180 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17181 %}
17182 ins_pipe(pipe_jcc);
17183 %}
17184
17185 // Jump Direct Conditional - using unsigned comparison
17186 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17187 match(If cop cmp);
17188 effect(USE labl);
17189
17190 ins_cost(300);
17191 format %{ "j$cop,u $labl" %}
17192 size(6);
17193 ins_encode %{
17194 Label* L = $labl$$label;
17195 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17196 %}
17197 ins_pipe(pipe_jcc);
17198 %}
17199
17200 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17201 match(If cop cmp);
17202 effect(USE labl);
17203
17204 ins_cost(200);
17205 format %{ "j$cop,u $labl" %}
17206 size(6);
17207 ins_encode %{
17208 Label* L = $labl$$label;
17209 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17210 %}
17211 ins_pipe(pipe_jcc);
17212 %}
17213
17214 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17215 match(If cop cmp);
17216 effect(USE labl);
17217
17218 ins_cost(200);
17219 format %{ $$template
17220 if ($cop$$cmpcode == Assembler::notEqual) {
17221 $$emit$$"jp,u $labl\n\t"
17222 $$emit$$"j$cop,u $labl"
17223 } else {
17224 $$emit$$"jp,u done\n\t"
17225 $$emit$$"j$cop,u $labl\n\t"
17226 $$emit$$"done:"
17227 }
17228 %}
17229 ins_encode %{
17230 Label* l = $labl$$label;
17231 if ($cop$$cmpcode == Assembler::notEqual) {
17232 __ jcc(Assembler::parity, *l, false);
17233 __ jcc(Assembler::notEqual, *l, false);
17234 } else if ($cop$$cmpcode == Assembler::equal) {
17235 Label done;
17236 __ jccb(Assembler::parity, done);
17237 __ jcc(Assembler::equal, *l, false);
17238 __ bind(done);
17239 } else {
17240 ShouldNotReachHere();
17241 }
17242 %}
17243 ins_pipe(pipe_jcc);
17244 %}
17245
17246 // Jump Direct Conditional - using signed and unsigned comparison
17247 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17248 match(If cop cmp);
17249 effect(USE labl);
17250
17251 ins_cost(200);
17252 format %{ "j$cop,su $labl" %}
17253 size(6);
17254 ins_encode %{
17255 Label* L = $labl$$label;
17256 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17257 %}
17258 ins_pipe(pipe_jcc);
17259 %}
17260
17261 // ============================================================================
17262 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17263 // superklass array for an instance of the superklass. Set a hidden
17264 // internal cache on a hit (cache is checked with exposed code in
17265 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17266 // encoding ALSO sets flags.
17267
17268 instruct partialSubtypeCheck(rdi_RegP result,
17269 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17270 rFlagsReg cr)
17271 %{
17272 match(Set result (PartialSubtypeCheck sub super));
17273 predicate(!UseSecondarySupersTable);
17274 effect(KILL rcx, KILL cr);
17275
17276 ins_cost(1100); // slightly larger than the next version
17277 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17278 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17279 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17280 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17281 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17282 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17283 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17284 "miss:\t" %}
17285
17286 ins_encode %{
17287 Label miss;
17288 // NB: Callers may assume that, when $result is a valid register,
17289 // check_klass_subtype_slow_path_linear sets it to a nonzero
17290 // value.
17291 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17292 $rcx$$Register, $result$$Register,
17293 nullptr, &miss,
17294 /*set_cond_codes:*/ true);
17295 __ xorptr($result$$Register, $result$$Register);
17296 __ bind(miss);
17297 %}
17298
17299 ins_pipe(pipe_slow);
17300 %}
17301
17302 // ============================================================================
17303 // Two versions of hashtable-based partialSubtypeCheck, both used when
17304 // we need to search for a super class in the secondary supers array.
17305 // The first is used when we don't know _a priori_ the class being
17306 // searched for. The second, far more common, is used when we do know:
17307 // this is used for instanceof, checkcast, and any case where C2 can
17308 // determine it by constant propagation.
17309
17310 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17311 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17312 rFlagsReg cr)
17313 %{
17314 match(Set result (PartialSubtypeCheck sub super));
17315 predicate(UseSecondarySupersTable);
17316 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17317
17318 ins_cost(1000);
17319 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17320
17321 ins_encode %{
17322 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17323 $temp3$$Register, $temp4$$Register, $result$$Register);
17324 %}
17325
17326 ins_pipe(pipe_slow);
17327 %}
17328
17329 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17330 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17331 rFlagsReg cr)
17332 %{
17333 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17334 predicate(UseSecondarySupersTable);
17335 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17336
17337 ins_cost(700); // smaller than the next version
17338 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17339
17340 ins_encode %{
17341 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17342 if (InlineSecondarySupersTest) {
17343 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17344 $temp3$$Register, $temp4$$Register, $result$$Register,
17345 super_klass_slot);
17346 } else {
17347 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17348 }
17349 %}
17350
17351 ins_pipe(pipe_slow);
17352 %}
17353
17354 // ============================================================================
17355 // Branch Instructions -- short offset versions
17356 //
17357 // These instructions are used to replace jumps of a long offset (the default
17358 // match) with jumps of a shorter offset. These instructions are all tagged
17359 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17360 // match rules in general matching. Instead, the ADLC generates a conversion
17361 // method in the MachNode which can be used to do in-place replacement of the
17362 // long variant with the shorter variant. The compiler will determine if a
17363 // branch can be taken by the is_short_branch_offset() predicate in the machine
17364 // specific code section of the file.
17365
17366 // Jump Direct - Label defines a relative address from JMP+1
17367 instruct jmpDir_short(label labl) %{
17368 match(Goto);
17369 effect(USE labl);
17370
17371 ins_cost(300);
17372 format %{ "jmp,s $labl" %}
17373 size(2);
17374 ins_encode %{
17375 Label* L = $labl$$label;
17376 __ jmpb(*L);
17377 %}
17378 ins_pipe(pipe_jmp);
17379 ins_short_branch(1);
17380 %}
17381
17382 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17383 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17384 match(If cop cr);
17385 effect(USE labl);
17386
17387 ins_cost(300);
17388 format %{ "j$cop,s $labl" %}
17389 size(2);
17390 ins_encode %{
17391 Label* L = $labl$$label;
17392 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17393 %}
17394 ins_pipe(pipe_jcc);
17395 ins_short_branch(1);
17396 %}
17397
17398 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17399 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17400 match(CountedLoopEnd cop cr);
17401 effect(USE labl);
17402
17403 ins_cost(300);
17404 format %{ "j$cop,s $labl\t# loop end" %}
17405 size(2);
17406 ins_encode %{
17407 Label* L = $labl$$label;
17408 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17409 %}
17410 ins_pipe(pipe_jcc);
17411 ins_short_branch(1);
17412 %}
17413
17414 // Jump Direct Conditional - using unsigned comparison
17415 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17416 match(If cop cmp);
17417 effect(USE labl);
17418
17419 ins_cost(300);
17420 format %{ "j$cop,us $labl" %}
17421 size(2);
17422 ins_encode %{
17423 Label* L = $labl$$label;
17424 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17425 %}
17426 ins_pipe(pipe_jcc);
17427 ins_short_branch(1);
17428 %}
17429
17430 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17431 match(If cop cmp);
17432 effect(USE labl);
17433
17434 ins_cost(300);
17435 format %{ "j$cop,us $labl" %}
17436 size(2);
17437 ins_encode %{
17438 Label* L = $labl$$label;
17439 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17440 %}
17441 ins_pipe(pipe_jcc);
17442 ins_short_branch(1);
17443 %}
17444
17445 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17446 match(If cop cmp);
17447 effect(USE labl);
17448
17449 ins_cost(300);
17450 format %{ $$template
17451 if ($cop$$cmpcode == Assembler::notEqual) {
17452 $$emit$$"jp,u,s $labl\n\t"
17453 $$emit$$"j$cop,u,s $labl"
17454 } else {
17455 $$emit$$"jp,u,s done\n\t"
17456 $$emit$$"j$cop,u,s $labl\n\t"
17457 $$emit$$"done:"
17458 }
17459 %}
17460 size(4);
17461 ins_encode %{
17462 Label* l = $labl$$label;
17463 if ($cop$$cmpcode == Assembler::notEqual) {
17464 __ jccb(Assembler::parity, *l);
17465 __ jccb(Assembler::notEqual, *l);
17466 } else if ($cop$$cmpcode == Assembler::equal) {
17467 Label done;
17468 __ jccb(Assembler::parity, done);
17469 __ jccb(Assembler::equal, *l);
17470 __ bind(done);
17471 } else {
17472 ShouldNotReachHere();
17473 }
17474 %}
17475 ins_pipe(pipe_jcc);
17476 ins_short_branch(1);
17477 %}
17478
17479 // Jump Direct Conditional - using signed and unsigned comparison
17480 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17481 match(If cop cmp);
17482 effect(USE labl);
17483
17484 ins_cost(300);
17485 format %{ "j$cop,sus $labl" %}
17486 size(2);
17487 ins_encode %{
17488 Label* L = $labl$$label;
17489 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17490 %}
17491 ins_pipe(pipe_jcc);
17492 ins_short_branch(1);
17493 %}
17494
17495 // ============================================================================
17496 // inlined locking and unlocking
17497
17498 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17499 match(Set cr (FastLock object box));
17500 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17501 ins_cost(300);
17502 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17503 ins_encode %{
17504 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17505 %}
17506 ins_pipe(pipe_slow);
17507 %}
17508
17509 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17510 match(Set cr (FastUnlock object rax_reg));
17511 effect(TEMP tmp, USE_KILL rax_reg);
17512 ins_cost(300);
17513 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17514 ins_encode %{
17515 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17516 %}
17517 ins_pipe(pipe_slow);
17518 %}
17519
17520
17521 // ============================================================================
17522 // Safepoint Instructions
17523 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17524 %{
17525 match(SafePoint poll);
17526 effect(KILL cr, USE poll);
17527
17528 format %{ "testl rax, [$poll]\t"
17529 "# Safepoint: poll for GC" %}
17530 ins_cost(125);
17531 ins_encode %{
17532 __ relocate(relocInfo::poll_type);
17533 address pre_pc = __ pc();
17534 __ testl(rax, Address($poll$$Register, 0));
17535 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17536 %}
17537 ins_pipe(ialu_reg_mem);
17538 %}
17539
17540 instruct mask_all_evexL(kReg dst, rRegL src) %{
17541 match(Set dst (MaskAll src));
17542 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17543 ins_encode %{
17544 int mask_len = Matcher::vector_length(this);
17545 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17546 %}
17547 ins_pipe( pipe_slow );
17548 %}
17549
17550 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17551 predicate(Matcher::vector_length(n) > 32);
17552 match(Set dst (MaskAll src));
17553 effect(TEMP tmp);
17554 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17555 ins_encode %{
17556 int mask_len = Matcher::vector_length(this);
17557 __ movslq($tmp$$Register, $src$$Register);
17558 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17559 %}
17560 ins_pipe( pipe_slow );
17561 %}
17562
17563 // ============================================================================
17564 // Procedure Call/Return Instructions
17565 // Call Java Static Instruction
17566 // Note: If this code changes, the corresponding ret_addr_offset() and
17567 // compute_padding() functions will have to be adjusted.
17568 instruct CallStaticJavaDirect(method meth) %{
17569 match(CallStaticJava);
17570 effect(USE meth);
17571
17572 ins_cost(300);
17573 format %{ "call,static " %}
17574 opcode(0xE8); /* E8 cd */
17575 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17576 ins_pipe(pipe_slow);
17577 ins_alignment(4);
17578 %}
17579
17580 // Call Java Dynamic Instruction
17581 // Note: If this code changes, the corresponding ret_addr_offset() and
17582 // compute_padding() functions will have to be adjusted.
17583 instruct CallDynamicJavaDirect(method meth)
17584 %{
17585 match(CallDynamicJava);
17586 effect(USE meth);
17587
17588 ins_cost(300);
17589 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17590 "call,dynamic " %}
17591 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17592 ins_pipe(pipe_slow);
17593 ins_alignment(4);
17594 %}
17595
17596 // Call Runtime Instruction
17597 instruct CallRuntimeDirect(method meth)
17598 %{
17599 match(CallRuntime);
17600 effect(USE meth);
17601
17602 ins_cost(300);
17603 format %{ "call,runtime " %}
17604 ins_encode(clear_avx, Java_To_Runtime(meth));
17605 ins_pipe(pipe_slow);
17606 %}
17607
17608 // Call runtime without safepoint
17609 instruct CallLeafDirect(method meth)
17610 %{
17611 match(CallLeaf);
17612 effect(USE meth);
17613
17614 ins_cost(300);
17615 format %{ "call_leaf,runtime " %}
17616 ins_encode(clear_avx, Java_To_Runtime(meth));
17617 ins_pipe(pipe_slow);
17618 %}
17619
17620 // Call runtime without safepoint and with vector arguments
17621 instruct CallLeafDirectVector(method meth)
17622 %{
17623 match(CallLeafVector);
17624 effect(USE meth);
17625
17626 ins_cost(300);
17627 format %{ "call_leaf,vector " %}
17628 ins_encode(Java_To_Runtime(meth));
17629 ins_pipe(pipe_slow);
17630 %}
17631
17632 // Call runtime without safepoint
17633 // entry point is null, target holds the address to call
17634 instruct CallLeafNoFPInDirect(rRegP target)
17635 %{
17636 predicate(n->as_Call()->entry_point() == nullptr);
17637 match(CallLeafNoFP target);
17638
17639 ins_cost(300);
17640 format %{ "call_leaf_nofp,runtime indirect " %}
17641 ins_encode %{
17642 __ call($target$$Register);
17643 %}
17644
17645 ins_pipe(pipe_slow);
17646 %}
17647
17648 // Call runtime without safepoint
17649 instruct CallLeafNoFPDirect(method meth)
17650 %{
17651 predicate(n->as_Call()->entry_point() != nullptr);
17652 match(CallLeafNoFP);
17653 effect(USE meth);
17654
17655 ins_cost(300);
17656 format %{ "call_leaf_nofp,runtime " %}
17657 ins_encode(clear_avx, Java_To_Runtime(meth));
17658 ins_pipe(pipe_slow);
17659 %}
17660
17661 // Return Instruction
17662 // Remove the return address & jump to it.
17663 // Notice: We always emit a nop after a ret to make sure there is room
17664 // for safepoint patching
17665 instruct Ret()
17666 %{
17667 match(Return);
17668
17669 format %{ "ret" %}
17670 ins_encode %{
17671 __ ret(0);
17672 %}
17673 ins_pipe(pipe_jmp);
17674 %}
17675
17676 // Tail Call; Jump from runtime stub to Java code.
17677 // Also known as an 'interprocedural jump'.
17678 // Target of jump will eventually return to caller.
17679 // TailJump below removes the return address.
17680 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17681 // emitted just above the TailCall which has reset rbp to the caller state.
17682 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17683 %{
17684 match(TailCall jump_target method_ptr);
17685
17686 ins_cost(300);
17687 format %{ "jmp $jump_target\t# rbx holds method" %}
17688 ins_encode %{
17689 __ jmp($jump_target$$Register);
17690 %}
17691 ins_pipe(pipe_jmp);
17692 %}
17693
17694 // Tail Jump; remove the return address; jump to target.
17695 // TailCall above leaves the return address around.
17696 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17697 %{
17698 match(TailJump jump_target ex_oop);
17699
17700 ins_cost(300);
17701 format %{ "popq rdx\t# pop return address\n\t"
17702 "jmp $jump_target" %}
17703 ins_encode %{
17704 __ popq(as_Register(RDX_enc));
17705 __ jmp($jump_target$$Register);
17706 %}
17707 ins_pipe(pipe_jmp);
17708 %}
17709
17710 // Forward exception.
17711 instruct ForwardExceptionjmp()
17712 %{
17713 match(ForwardException);
17714
17715 format %{ "jmp forward_exception_stub" %}
17716 ins_encode %{
17717 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17718 %}
17719 ins_pipe(pipe_jmp);
17720 %}
17721
17722 // Create exception oop: created by stack-crawling runtime code.
17723 // Created exception is now available to this handler, and is setup
17724 // just prior to jumping to this handler. No code emitted.
17725 instruct CreateException(rax_RegP ex_oop)
17726 %{
17727 match(Set ex_oop (CreateEx));
17728
17729 size(0);
17730 // use the following format syntax
17731 format %{ "# exception oop is in rax; no code emitted" %}
17732 ins_encode();
17733 ins_pipe(empty);
17734 %}
17735
17736 // Rethrow exception:
17737 // The exception oop will come in the first argument position.
17738 // Then JUMP (not call) to the rethrow stub code.
17739 instruct RethrowException()
17740 %{
17741 match(Rethrow);
17742
17743 // use the following format syntax
17744 format %{ "jmp rethrow_stub" %}
17745 ins_encode %{
17746 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17747 %}
17748 ins_pipe(pipe_jmp);
17749 %}
17750
17751 // ============================================================================
17752 // This name is KNOWN by the ADLC and cannot be changed.
17753 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17754 // for this guy.
17755 instruct tlsLoadP(r15_RegP dst) %{
17756 match(Set dst (ThreadLocal));
17757 effect(DEF dst);
17758
17759 size(0);
17760 format %{ "# TLS is in R15" %}
17761 ins_encode( /*empty encoding*/ );
17762 ins_pipe(ialu_reg_reg);
17763 %}
17764
17765 instruct addF_reg(regF dst, regF src) %{
17766 predicate(UseAVX == 0);
17767 match(Set dst (AddF dst src));
17768
17769 format %{ "addss $dst, $src" %}
17770 ins_cost(150);
17771 ins_encode %{
17772 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17773 %}
17774 ins_pipe(pipe_slow);
17775 %}
17776
17777 instruct addF_mem(regF dst, memory src) %{
17778 predicate(UseAVX == 0);
17779 match(Set dst (AddF dst (LoadF src)));
17780
17781 format %{ "addss $dst, $src" %}
17782 ins_cost(150);
17783 ins_encode %{
17784 __ addss($dst$$XMMRegister, $src$$Address);
17785 %}
17786 ins_pipe(pipe_slow);
17787 %}
17788
17789 instruct addF_imm(regF dst, immF con) %{
17790 predicate(UseAVX == 0);
17791 match(Set dst (AddF dst con));
17792 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17793 ins_cost(150);
17794 ins_encode %{
17795 __ addss($dst$$XMMRegister, $constantaddress($con));
17796 %}
17797 ins_pipe(pipe_slow);
17798 %}
17799
17800 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17801 predicate(UseAVX > 0);
17802 match(Set dst (AddF src1 src2));
17803
17804 format %{ "vaddss $dst, $src1, $src2" %}
17805 ins_cost(150);
17806 ins_encode %{
17807 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17808 %}
17809 ins_pipe(pipe_slow);
17810 %}
17811
17812 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17813 predicate(UseAVX > 0);
17814 match(Set dst (AddF src1 (LoadF src2)));
17815
17816 format %{ "vaddss $dst, $src1, $src2" %}
17817 ins_cost(150);
17818 ins_encode %{
17819 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17820 %}
17821 ins_pipe(pipe_slow);
17822 %}
17823
17824 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17825 predicate(UseAVX > 0);
17826 match(Set dst (AddF src con));
17827
17828 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17829 ins_cost(150);
17830 ins_encode %{
17831 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17832 %}
17833 ins_pipe(pipe_slow);
17834 %}
17835
17836 instruct addD_reg(regD dst, regD src) %{
17837 predicate(UseAVX == 0);
17838 match(Set dst (AddD dst src));
17839
17840 format %{ "addsd $dst, $src" %}
17841 ins_cost(150);
17842 ins_encode %{
17843 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17844 %}
17845 ins_pipe(pipe_slow);
17846 %}
17847
17848 instruct addD_mem(regD dst, memory src) %{
17849 predicate(UseAVX == 0);
17850 match(Set dst (AddD dst (LoadD src)));
17851
17852 format %{ "addsd $dst, $src" %}
17853 ins_cost(150);
17854 ins_encode %{
17855 __ addsd($dst$$XMMRegister, $src$$Address);
17856 %}
17857 ins_pipe(pipe_slow);
17858 %}
17859
17860 instruct addD_imm(regD dst, immD con) %{
17861 predicate(UseAVX == 0);
17862 match(Set dst (AddD dst con));
17863 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17864 ins_cost(150);
17865 ins_encode %{
17866 __ addsd($dst$$XMMRegister, $constantaddress($con));
17867 %}
17868 ins_pipe(pipe_slow);
17869 %}
17870
17871 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17872 predicate(UseAVX > 0);
17873 match(Set dst (AddD src1 src2));
17874
17875 format %{ "vaddsd $dst, $src1, $src2" %}
17876 ins_cost(150);
17877 ins_encode %{
17878 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17879 %}
17880 ins_pipe(pipe_slow);
17881 %}
17882
17883 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17884 predicate(UseAVX > 0);
17885 match(Set dst (AddD src1 (LoadD src2)));
17886
17887 format %{ "vaddsd $dst, $src1, $src2" %}
17888 ins_cost(150);
17889 ins_encode %{
17890 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17891 %}
17892 ins_pipe(pipe_slow);
17893 %}
17894
17895 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17896 predicate(UseAVX > 0);
17897 match(Set dst (AddD src con));
17898
17899 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17900 ins_cost(150);
17901 ins_encode %{
17902 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17903 %}
17904 ins_pipe(pipe_slow);
17905 %}
17906
17907 instruct subF_reg(regF dst, regF src) %{
17908 predicate(UseAVX == 0);
17909 match(Set dst (SubF dst src));
17910
17911 format %{ "subss $dst, $src" %}
17912 ins_cost(150);
17913 ins_encode %{
17914 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17915 %}
17916 ins_pipe(pipe_slow);
17917 %}
17918
17919 instruct subF_mem(regF dst, memory src) %{
17920 predicate(UseAVX == 0);
17921 match(Set dst (SubF dst (LoadF src)));
17922
17923 format %{ "subss $dst, $src" %}
17924 ins_cost(150);
17925 ins_encode %{
17926 __ subss($dst$$XMMRegister, $src$$Address);
17927 %}
17928 ins_pipe(pipe_slow);
17929 %}
17930
17931 instruct subF_imm(regF dst, immF con) %{
17932 predicate(UseAVX == 0);
17933 match(Set dst (SubF dst con));
17934 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17935 ins_cost(150);
17936 ins_encode %{
17937 __ subss($dst$$XMMRegister, $constantaddress($con));
17938 %}
17939 ins_pipe(pipe_slow);
17940 %}
17941
17942 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17943 predicate(UseAVX > 0);
17944 match(Set dst (SubF src1 src2));
17945
17946 format %{ "vsubss $dst, $src1, $src2" %}
17947 ins_cost(150);
17948 ins_encode %{
17949 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17950 %}
17951 ins_pipe(pipe_slow);
17952 %}
17953
17954 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17955 predicate(UseAVX > 0);
17956 match(Set dst (SubF src1 (LoadF src2)));
17957
17958 format %{ "vsubss $dst, $src1, $src2" %}
17959 ins_cost(150);
17960 ins_encode %{
17961 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17962 %}
17963 ins_pipe(pipe_slow);
17964 %}
17965
17966 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17967 predicate(UseAVX > 0);
17968 match(Set dst (SubF src con));
17969
17970 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17971 ins_cost(150);
17972 ins_encode %{
17973 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17974 %}
17975 ins_pipe(pipe_slow);
17976 %}
17977
17978 instruct subD_reg(regD dst, regD src) %{
17979 predicate(UseAVX == 0);
17980 match(Set dst (SubD dst src));
17981
17982 format %{ "subsd $dst, $src" %}
17983 ins_cost(150);
17984 ins_encode %{
17985 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17986 %}
17987 ins_pipe(pipe_slow);
17988 %}
17989
17990 instruct subD_mem(regD dst, memory src) %{
17991 predicate(UseAVX == 0);
17992 match(Set dst (SubD dst (LoadD src)));
17993
17994 format %{ "subsd $dst, $src" %}
17995 ins_cost(150);
17996 ins_encode %{
17997 __ subsd($dst$$XMMRegister, $src$$Address);
17998 %}
17999 ins_pipe(pipe_slow);
18000 %}
18001
18002 instruct subD_imm(regD dst, immD con) %{
18003 predicate(UseAVX == 0);
18004 match(Set dst (SubD dst con));
18005 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18006 ins_cost(150);
18007 ins_encode %{
18008 __ subsd($dst$$XMMRegister, $constantaddress($con));
18009 %}
18010 ins_pipe(pipe_slow);
18011 %}
18012
18013 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
18014 predicate(UseAVX > 0);
18015 match(Set dst (SubD src1 src2));
18016
18017 format %{ "vsubsd $dst, $src1, $src2" %}
18018 ins_cost(150);
18019 ins_encode %{
18020 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18021 %}
18022 ins_pipe(pipe_slow);
18023 %}
18024
18025 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
18026 predicate(UseAVX > 0);
18027 match(Set dst (SubD src1 (LoadD src2)));
18028
18029 format %{ "vsubsd $dst, $src1, $src2" %}
18030 ins_cost(150);
18031 ins_encode %{
18032 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18033 %}
18034 ins_pipe(pipe_slow);
18035 %}
18036
18037 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18038 predicate(UseAVX > 0);
18039 match(Set dst (SubD src con));
18040
18041 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18042 ins_cost(150);
18043 ins_encode %{
18044 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18045 %}
18046 ins_pipe(pipe_slow);
18047 %}
18048
18049 instruct mulF_reg(regF dst, regF src) %{
18050 predicate(UseAVX == 0);
18051 match(Set dst (MulF dst src));
18052
18053 format %{ "mulss $dst, $src" %}
18054 ins_cost(150);
18055 ins_encode %{
18056 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18057 %}
18058 ins_pipe(pipe_slow);
18059 %}
18060
18061 instruct mulF_mem(regF dst, memory src) %{
18062 predicate(UseAVX == 0);
18063 match(Set dst (MulF dst (LoadF src)));
18064
18065 format %{ "mulss $dst, $src" %}
18066 ins_cost(150);
18067 ins_encode %{
18068 __ mulss($dst$$XMMRegister, $src$$Address);
18069 %}
18070 ins_pipe(pipe_slow);
18071 %}
18072
18073 instruct mulF_imm(regF dst, immF con) %{
18074 predicate(UseAVX == 0);
18075 match(Set dst (MulF dst con));
18076 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18077 ins_cost(150);
18078 ins_encode %{
18079 __ mulss($dst$$XMMRegister, $constantaddress($con));
18080 %}
18081 ins_pipe(pipe_slow);
18082 %}
18083
18084 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18085 predicate(UseAVX > 0);
18086 match(Set dst (MulF src1 src2));
18087
18088 format %{ "vmulss $dst, $src1, $src2" %}
18089 ins_cost(150);
18090 ins_encode %{
18091 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18092 %}
18093 ins_pipe(pipe_slow);
18094 %}
18095
18096 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18097 predicate(UseAVX > 0);
18098 match(Set dst (MulF src1 (LoadF src2)));
18099
18100 format %{ "vmulss $dst, $src1, $src2" %}
18101 ins_cost(150);
18102 ins_encode %{
18103 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18104 %}
18105 ins_pipe(pipe_slow);
18106 %}
18107
18108 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18109 predicate(UseAVX > 0);
18110 match(Set dst (MulF src con));
18111
18112 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18113 ins_cost(150);
18114 ins_encode %{
18115 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18116 %}
18117 ins_pipe(pipe_slow);
18118 %}
18119
18120 instruct mulD_reg(regD dst, regD src) %{
18121 predicate(UseAVX == 0);
18122 match(Set dst (MulD dst src));
18123
18124 format %{ "mulsd $dst, $src" %}
18125 ins_cost(150);
18126 ins_encode %{
18127 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18128 %}
18129 ins_pipe(pipe_slow);
18130 %}
18131
18132 instruct mulD_mem(regD dst, memory src) %{
18133 predicate(UseAVX == 0);
18134 match(Set dst (MulD dst (LoadD src)));
18135
18136 format %{ "mulsd $dst, $src" %}
18137 ins_cost(150);
18138 ins_encode %{
18139 __ mulsd($dst$$XMMRegister, $src$$Address);
18140 %}
18141 ins_pipe(pipe_slow);
18142 %}
18143
18144 instruct mulD_imm(regD dst, immD con) %{
18145 predicate(UseAVX == 0);
18146 match(Set dst (MulD dst con));
18147 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18148 ins_cost(150);
18149 ins_encode %{
18150 __ mulsd($dst$$XMMRegister, $constantaddress($con));
18151 %}
18152 ins_pipe(pipe_slow);
18153 %}
18154
18155 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18156 predicate(UseAVX > 0);
18157 match(Set dst (MulD src1 src2));
18158
18159 format %{ "vmulsd $dst, $src1, $src2" %}
18160 ins_cost(150);
18161 ins_encode %{
18162 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18163 %}
18164 ins_pipe(pipe_slow);
18165 %}
18166
18167 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18168 predicate(UseAVX > 0);
18169 match(Set dst (MulD src1 (LoadD src2)));
18170
18171 format %{ "vmulsd $dst, $src1, $src2" %}
18172 ins_cost(150);
18173 ins_encode %{
18174 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18175 %}
18176 ins_pipe(pipe_slow);
18177 %}
18178
18179 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18180 predicate(UseAVX > 0);
18181 match(Set dst (MulD src con));
18182
18183 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18184 ins_cost(150);
18185 ins_encode %{
18186 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18187 %}
18188 ins_pipe(pipe_slow);
18189 %}
18190
18191 instruct divF_reg(regF dst, regF src) %{
18192 predicate(UseAVX == 0);
18193 match(Set dst (DivF dst src));
18194
18195 format %{ "divss $dst, $src" %}
18196 ins_cost(150);
18197 ins_encode %{
18198 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18199 %}
18200 ins_pipe(pipe_slow);
18201 %}
18202
18203 instruct divF_mem(regF dst, memory src) %{
18204 predicate(UseAVX == 0);
18205 match(Set dst (DivF dst (LoadF src)));
18206
18207 format %{ "divss $dst, $src" %}
18208 ins_cost(150);
18209 ins_encode %{
18210 __ divss($dst$$XMMRegister, $src$$Address);
18211 %}
18212 ins_pipe(pipe_slow);
18213 %}
18214
18215 instruct divF_imm(regF dst, immF con) %{
18216 predicate(UseAVX == 0);
18217 match(Set dst (DivF dst con));
18218 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18219 ins_cost(150);
18220 ins_encode %{
18221 __ divss($dst$$XMMRegister, $constantaddress($con));
18222 %}
18223 ins_pipe(pipe_slow);
18224 %}
18225
18226 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18227 predicate(UseAVX > 0);
18228 match(Set dst (DivF src1 src2));
18229
18230 format %{ "vdivss $dst, $src1, $src2" %}
18231 ins_cost(150);
18232 ins_encode %{
18233 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18234 %}
18235 ins_pipe(pipe_slow);
18236 %}
18237
18238 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18239 predicate(UseAVX > 0);
18240 match(Set dst (DivF src1 (LoadF src2)));
18241
18242 format %{ "vdivss $dst, $src1, $src2" %}
18243 ins_cost(150);
18244 ins_encode %{
18245 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18246 %}
18247 ins_pipe(pipe_slow);
18248 %}
18249
18250 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18251 predicate(UseAVX > 0);
18252 match(Set dst (DivF src con));
18253
18254 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18255 ins_cost(150);
18256 ins_encode %{
18257 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18258 %}
18259 ins_pipe(pipe_slow);
18260 %}
18261
18262 instruct divD_reg(regD dst, regD src) %{
18263 predicate(UseAVX == 0);
18264 match(Set dst (DivD dst src));
18265
18266 format %{ "divsd $dst, $src" %}
18267 ins_cost(150);
18268 ins_encode %{
18269 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18270 %}
18271 ins_pipe(pipe_slow);
18272 %}
18273
18274 instruct divD_mem(regD dst, memory src) %{
18275 predicate(UseAVX == 0);
18276 match(Set dst (DivD dst (LoadD src)));
18277
18278 format %{ "divsd $dst, $src" %}
18279 ins_cost(150);
18280 ins_encode %{
18281 __ divsd($dst$$XMMRegister, $src$$Address);
18282 %}
18283 ins_pipe(pipe_slow);
18284 %}
18285
18286 instruct divD_imm(regD dst, immD con) %{
18287 predicate(UseAVX == 0);
18288 match(Set dst (DivD dst con));
18289 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18290 ins_cost(150);
18291 ins_encode %{
18292 __ divsd($dst$$XMMRegister, $constantaddress($con));
18293 %}
18294 ins_pipe(pipe_slow);
18295 %}
18296
18297 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18298 predicate(UseAVX > 0);
18299 match(Set dst (DivD src1 src2));
18300
18301 format %{ "vdivsd $dst, $src1, $src2" %}
18302 ins_cost(150);
18303 ins_encode %{
18304 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18305 %}
18306 ins_pipe(pipe_slow);
18307 %}
18308
18309 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18310 predicate(UseAVX > 0);
18311 match(Set dst (DivD src1 (LoadD src2)));
18312
18313 format %{ "vdivsd $dst, $src1, $src2" %}
18314 ins_cost(150);
18315 ins_encode %{
18316 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18317 %}
18318 ins_pipe(pipe_slow);
18319 %}
18320
18321 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18322 predicate(UseAVX > 0);
18323 match(Set dst (DivD src con));
18324
18325 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18326 ins_cost(150);
18327 ins_encode %{
18328 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18329 %}
18330 ins_pipe(pipe_slow);
18331 %}
18332
18333 instruct absF_reg(regF dst) %{
18334 predicate(UseAVX == 0);
18335 match(Set dst (AbsF dst));
18336 ins_cost(150);
18337 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18338 ins_encode %{
18339 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18340 %}
18341 ins_pipe(pipe_slow);
18342 %}
18343
18344 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18345 predicate(UseAVX > 0);
18346 match(Set dst (AbsF src));
18347 ins_cost(150);
18348 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18349 ins_encode %{
18350 int vlen_enc = Assembler::AVX_128bit;
18351 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18352 ExternalAddress(float_signmask()), vlen_enc);
18353 %}
18354 ins_pipe(pipe_slow);
18355 %}
18356
18357 instruct absD_reg(regD dst) %{
18358 predicate(UseAVX == 0);
18359 match(Set dst (AbsD dst));
18360 ins_cost(150);
18361 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18362 "# abs double by sign masking" %}
18363 ins_encode %{
18364 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18365 %}
18366 ins_pipe(pipe_slow);
18367 %}
18368
18369 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18370 predicate(UseAVX > 0);
18371 match(Set dst (AbsD src));
18372 ins_cost(150);
18373 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18374 "# abs double by sign masking" %}
18375 ins_encode %{
18376 int vlen_enc = Assembler::AVX_128bit;
18377 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18378 ExternalAddress(double_signmask()), vlen_enc);
18379 %}
18380 ins_pipe(pipe_slow);
18381 %}
18382
18383 instruct negF_reg(regF dst) %{
18384 predicate(UseAVX == 0);
18385 match(Set dst (NegF dst));
18386 ins_cost(150);
18387 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18388 ins_encode %{
18389 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18390 %}
18391 ins_pipe(pipe_slow);
18392 %}
18393
18394 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18395 predicate(UseAVX > 0);
18396 match(Set dst (NegF src));
18397 ins_cost(150);
18398 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18399 ins_encode %{
18400 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18401 ExternalAddress(float_signflip()));
18402 %}
18403 ins_pipe(pipe_slow);
18404 %}
18405
18406 instruct negD_reg(regD dst) %{
18407 predicate(UseAVX == 0);
18408 match(Set dst (NegD dst));
18409 ins_cost(150);
18410 format %{ "xorpd $dst, [0x8000000000000000]\t"
18411 "# neg double by sign flipping" %}
18412 ins_encode %{
18413 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18414 %}
18415 ins_pipe(pipe_slow);
18416 %}
18417
18418 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18419 predicate(UseAVX > 0);
18420 match(Set dst (NegD src));
18421 ins_cost(150);
18422 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18423 "# neg double by sign flipping" %}
18424 ins_encode %{
18425 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18426 ExternalAddress(double_signflip()));
18427 %}
18428 ins_pipe(pipe_slow);
18429 %}
18430
18431 // sqrtss instruction needs destination register to be pre initialized for best performance
18432 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18433 instruct sqrtF_reg(regF dst) %{
18434 match(Set dst (SqrtF dst));
18435 format %{ "sqrtss $dst, $dst" %}
18436 ins_encode %{
18437 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18438 %}
18439 ins_pipe(pipe_slow);
18440 %}
18441
18442 // sqrtsd instruction needs destination register to be pre initialized for best performance
18443 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18444 instruct sqrtD_reg(regD dst) %{
18445 match(Set dst (SqrtD dst));
18446 format %{ "sqrtsd $dst, $dst" %}
18447 ins_encode %{
18448 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18449 %}
18450 ins_pipe(pipe_slow);
18451 %}
18452
18453 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18454 effect(TEMP tmp);
18455 match(Set dst (ConvF2HF src));
18456 ins_cost(125);
18457 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18458 ins_encode %{
18459 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18460 %}
18461 ins_pipe( pipe_slow );
18462 %}
18463
18464 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18465 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18466 effect(TEMP ktmp, TEMP rtmp);
18467 match(Set mem (StoreC mem (ConvF2HF src)));
18468 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18469 ins_encode %{
18470 __ movl($rtmp$$Register, 0x1);
18471 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18472 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18473 %}
18474 ins_pipe( pipe_slow );
18475 %}
18476
18477 instruct vconvF2HF(vec dst, vec src) %{
18478 match(Set dst (VectorCastF2HF src));
18479 format %{ "vector_conv_F2HF $dst $src" %}
18480 ins_encode %{
18481 int vlen_enc = vector_length_encoding(this, $src);
18482 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18483 %}
18484 ins_pipe( pipe_slow );
18485 %}
18486
18487 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18488 predicate(n->as_StoreVector()->memory_size() >= 16);
18489 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18490 format %{ "vcvtps2ph $mem,$src" %}
18491 ins_encode %{
18492 int vlen_enc = vector_length_encoding(this, $src);
18493 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18494 %}
18495 ins_pipe( pipe_slow );
18496 %}
18497
18498 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18499 match(Set dst (ConvHF2F src));
18500 format %{ "vcvtph2ps $dst,$src" %}
18501 ins_encode %{
18502 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18503 %}
18504 ins_pipe( pipe_slow );
18505 %}
18506
18507 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18508 match(Set dst (VectorCastHF2F (LoadVector mem)));
18509 format %{ "vcvtph2ps $dst,$mem" %}
18510 ins_encode %{
18511 int vlen_enc = vector_length_encoding(this);
18512 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18513 %}
18514 ins_pipe( pipe_slow );
18515 %}
18516
18517 instruct vconvHF2F(vec dst, vec src) %{
18518 match(Set dst (VectorCastHF2F src));
18519 ins_cost(125);
18520 format %{ "vector_conv_HF2F $dst,$src" %}
18521 ins_encode %{
18522 int vlen_enc = vector_length_encoding(this);
18523 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18524 %}
18525 ins_pipe( pipe_slow );
18526 %}
18527
18528 // ---------------------------------------- VectorReinterpret ------------------------------------
18529 instruct reinterpret_mask(kReg dst) %{
18530 predicate(n->bottom_type()->isa_vectmask() &&
18531 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18532 match(Set dst (VectorReinterpret dst));
18533 ins_cost(125);
18534 format %{ "vector_reinterpret $dst\t!" %}
18535 ins_encode %{
18536 // empty
18537 %}
18538 ins_pipe( pipe_slow );
18539 %}
18540
18541 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18542 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18543 n->bottom_type()->isa_vectmask() &&
18544 n->in(1)->bottom_type()->isa_vectmask() &&
18545 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18546 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18547 match(Set dst (VectorReinterpret src));
18548 effect(TEMP xtmp);
18549 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18550 ins_encode %{
18551 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18552 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18553 assert(src_sz == dst_sz , "src and dst size mismatch");
18554 int vlen_enc = vector_length_encoding(src_sz);
18555 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18556 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18557 %}
18558 ins_pipe( pipe_slow );
18559 %}
18560
18561 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18562 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18563 n->bottom_type()->isa_vectmask() &&
18564 n->in(1)->bottom_type()->isa_vectmask() &&
18565 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18566 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18567 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18568 match(Set dst (VectorReinterpret src));
18569 effect(TEMP xtmp);
18570 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18571 ins_encode %{
18572 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18573 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18574 assert(src_sz == dst_sz , "src and dst size mismatch");
18575 int vlen_enc = vector_length_encoding(src_sz);
18576 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18577 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18578 %}
18579 ins_pipe( pipe_slow );
18580 %}
18581
18582 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18583 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18584 n->bottom_type()->isa_vectmask() &&
18585 n->in(1)->bottom_type()->isa_vectmask() &&
18586 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18587 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18588 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18589 match(Set dst (VectorReinterpret src));
18590 effect(TEMP xtmp);
18591 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18592 ins_encode %{
18593 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18594 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18595 assert(src_sz == dst_sz , "src and dst size mismatch");
18596 int vlen_enc = vector_length_encoding(src_sz);
18597 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18598 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18599 %}
18600 ins_pipe( pipe_slow );
18601 %}
18602
18603 instruct reinterpret(vec dst) %{
18604 predicate(!n->bottom_type()->isa_vectmask() &&
18605 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18606 match(Set dst (VectorReinterpret dst));
18607 ins_cost(125);
18608 format %{ "vector_reinterpret $dst\t!" %}
18609 ins_encode %{
18610 // empty
18611 %}
18612 ins_pipe( pipe_slow );
18613 %}
18614
18615 instruct reinterpret_expand(vec dst, vec src) %{
18616 predicate(UseAVX == 0 &&
18617 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18618 match(Set dst (VectorReinterpret src));
18619 ins_cost(125);
18620 effect(TEMP dst);
18621 format %{ "vector_reinterpret_expand $dst,$src" %}
18622 ins_encode %{
18623 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18624 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18625
18626 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18627 if (src_vlen_in_bytes == 4) {
18628 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18629 } else {
18630 assert(src_vlen_in_bytes == 8, "");
18631 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18632 }
18633 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18634 %}
18635 ins_pipe( pipe_slow );
18636 %}
18637
18638 instruct vreinterpret_expand4(legVec dst, vec src) %{
18639 predicate(UseAVX > 0 &&
18640 !n->bottom_type()->isa_vectmask() &&
18641 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18642 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18643 match(Set dst (VectorReinterpret src));
18644 ins_cost(125);
18645 format %{ "vector_reinterpret_expand $dst,$src" %}
18646 ins_encode %{
18647 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18648 %}
18649 ins_pipe( pipe_slow );
18650 %}
18651
18652
18653 instruct vreinterpret_expand(legVec dst, vec src) %{
18654 predicate(UseAVX > 0 &&
18655 !n->bottom_type()->isa_vectmask() &&
18656 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18657 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18658 match(Set dst (VectorReinterpret src));
18659 ins_cost(125);
18660 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18661 ins_encode %{
18662 switch (Matcher::vector_length_in_bytes(this, $src)) {
18663 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18664 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18665 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18666 default: ShouldNotReachHere();
18667 }
18668 %}
18669 ins_pipe( pipe_slow );
18670 %}
18671
18672 instruct reinterpret_shrink(vec dst, legVec src) %{
18673 predicate(!n->bottom_type()->isa_vectmask() &&
18674 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18675 match(Set dst (VectorReinterpret src));
18676 ins_cost(125);
18677 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18678 ins_encode %{
18679 switch (Matcher::vector_length_in_bytes(this)) {
18680 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18681 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18682 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18683 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18684 default: ShouldNotReachHere();
18685 }
18686 %}
18687 ins_pipe( pipe_slow );
18688 %}
18689
18690 // ----------------------------------------------------------------------------------------------------
18691
18692 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18693 match(Set dst (RoundDoubleMode src rmode));
18694 format %{ "roundsd $dst,$src" %}
18695 ins_cost(150);
18696 ins_encode %{
18697 assert(UseSSE >= 4, "required");
18698 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18699 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18700 }
18701 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18702 %}
18703 ins_pipe(pipe_slow);
18704 %}
18705
18706 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18707 match(Set dst (RoundDoubleMode con rmode));
18708 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18709 ins_cost(150);
18710 ins_encode %{
18711 assert(UseSSE >= 4, "required");
18712 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18713 %}
18714 ins_pipe(pipe_slow);
18715 %}
18716
18717 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18718 predicate(Matcher::vector_length(n) < 8);
18719 match(Set dst (RoundDoubleModeV src rmode));
18720 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18721 ins_encode %{
18722 assert(UseAVX > 0, "required");
18723 int vlen_enc = vector_length_encoding(this);
18724 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18725 %}
18726 ins_pipe( pipe_slow );
18727 %}
18728
18729 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18730 predicate(Matcher::vector_length(n) == 8);
18731 match(Set dst (RoundDoubleModeV src rmode));
18732 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18733 ins_encode %{
18734 assert(UseAVX > 2, "required");
18735 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18736 %}
18737 ins_pipe( pipe_slow );
18738 %}
18739
18740 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18741 predicate(Matcher::vector_length(n) < 8);
18742 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18743 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18744 ins_encode %{
18745 assert(UseAVX > 0, "required");
18746 int vlen_enc = vector_length_encoding(this);
18747 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18748 %}
18749 ins_pipe( pipe_slow );
18750 %}
18751
18752 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18753 predicate(Matcher::vector_length(n) == 8);
18754 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18755 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18756 ins_encode %{
18757 assert(UseAVX > 2, "required");
18758 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18759 %}
18760 ins_pipe( pipe_slow );
18761 %}
18762
18763 instruct onspinwait() %{
18764 match(OnSpinWait);
18765 ins_cost(200);
18766
18767 format %{
18768 $$template
18769 $$emit$$"pause\t! membar_onspinwait"
18770 %}
18771 ins_encode %{
18772 __ pause();
18773 %}
18774 ins_pipe(pipe_slow);
18775 %}
18776
18777 // a * b + c
18778 instruct fmaD_reg(regD a, regD b, regD c) %{
18779 match(Set c (FmaD c (Binary a b)));
18780 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18781 ins_cost(150);
18782 ins_encode %{
18783 assert(UseFMA, "Needs FMA instructions support.");
18784 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18785 %}
18786 ins_pipe( pipe_slow );
18787 %}
18788
18789 // a * b + c
18790 instruct fmaF_reg(regF a, regF b, regF c) %{
18791 match(Set c (FmaF c (Binary a b)));
18792 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18793 ins_cost(150);
18794 ins_encode %{
18795 assert(UseFMA, "Needs FMA instructions support.");
18796 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18797 %}
18798 ins_pipe( pipe_slow );
18799 %}
18800
18801 // ====================VECTOR INSTRUCTIONS=====================================
18802
18803 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18804 instruct MoveVec2Leg(legVec dst, vec src) %{
18805 match(Set dst src);
18806 format %{ "" %}
18807 ins_encode %{
18808 ShouldNotReachHere();
18809 %}
18810 ins_pipe( fpu_reg_reg );
18811 %}
18812
18813 instruct MoveLeg2Vec(vec dst, legVec src) %{
18814 match(Set dst src);
18815 format %{ "" %}
18816 ins_encode %{
18817 ShouldNotReachHere();
18818 %}
18819 ins_pipe( fpu_reg_reg );
18820 %}
18821
18822 // ============================================================================
18823
18824 // Load vectors generic operand pattern
18825 instruct loadV(vec dst, memory mem) %{
18826 match(Set dst (LoadVector mem));
18827 ins_cost(125);
18828 format %{ "load_vector $dst,$mem" %}
18829 ins_encode %{
18830 BasicType bt = Matcher::vector_element_basic_type(this);
18831 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18832 %}
18833 ins_pipe( pipe_slow );
18834 %}
18835
18836 // Store vectors generic operand pattern.
18837 instruct storeV(memory mem, vec src) %{
18838 match(Set mem (StoreVector mem src));
18839 ins_cost(145);
18840 format %{ "store_vector $mem,$src\n\t" %}
18841 ins_encode %{
18842 switch (Matcher::vector_length_in_bytes(this, $src)) {
18843 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18844 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18845 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18846 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18847 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18848 default: ShouldNotReachHere();
18849 }
18850 %}
18851 ins_pipe( pipe_slow );
18852 %}
18853
18854 // ---------------------------------------- Gather ------------------------------------
18855
18856 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18857
18858 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18859 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18860 Matcher::vector_length_in_bytes(n) <= 32);
18861 match(Set dst (LoadVectorGather mem idx));
18862 effect(TEMP dst, TEMP tmp, TEMP mask);
18863 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18864 ins_encode %{
18865 int vlen_enc = vector_length_encoding(this);
18866 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18867 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18868 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18869 __ lea($tmp$$Register, $mem$$Address);
18870 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18871 %}
18872 ins_pipe( pipe_slow );
18873 %}
18874
18875
18876 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18877 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18878 !is_subword_type(Matcher::vector_element_basic_type(n)));
18879 match(Set dst (LoadVectorGather mem idx));
18880 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18881 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18882 ins_encode %{
18883 int vlen_enc = vector_length_encoding(this);
18884 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18885 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18886 __ lea($tmp$$Register, $mem$$Address);
18887 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18888 %}
18889 ins_pipe( pipe_slow );
18890 %}
18891
18892 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18893 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18894 !is_subword_type(Matcher::vector_element_basic_type(n)));
18895 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18896 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18897 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18898 ins_encode %{
18899 assert(UseAVX > 2, "sanity");
18900 int vlen_enc = vector_length_encoding(this);
18901 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18902 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18903 // Note: Since gather instruction partially updates the opmask register used
18904 // for predication hense moving mask operand to a temporary.
18905 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18906 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18907 __ lea($tmp$$Register, $mem$$Address);
18908 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18909 %}
18910 ins_pipe( pipe_slow );
18911 %}
18912
18913 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18914 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18915 match(Set dst (LoadVectorGather mem idx_base));
18916 effect(TEMP tmp, TEMP rtmp);
18917 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18918 ins_encode %{
18919 int vlen_enc = vector_length_encoding(this);
18920 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18921 __ lea($tmp$$Register, $mem$$Address);
18922 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18923 %}
18924 ins_pipe( pipe_slow );
18925 %}
18926
18927 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18928 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18929 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18930 match(Set dst (LoadVectorGather mem idx_base));
18931 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18932 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18933 ins_encode %{
18934 int vlen_enc = vector_length_encoding(this);
18935 int vector_len = Matcher::vector_length(this);
18936 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18937 __ lea($tmp$$Register, $mem$$Address);
18938 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18939 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18940 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18941 %}
18942 ins_pipe( pipe_slow );
18943 %}
18944
18945 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18946 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18947 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18948 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18949 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18950 ins_encode %{
18951 int vlen_enc = vector_length_encoding(this);
18952 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18953 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18954 __ lea($tmp$$Register, $mem$$Address);
18955 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18956 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18957 %}
18958 ins_pipe( pipe_slow );
18959 %}
18960
18961 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18962 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18963 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18964 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18965 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18966 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18967 ins_encode %{
18968 int vlen_enc = vector_length_encoding(this);
18969 int vector_len = Matcher::vector_length(this);
18970 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18971 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18972 __ lea($tmp$$Register, $mem$$Address);
18973 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18974 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18975 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18976 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18977 %}
18978 ins_pipe( pipe_slow );
18979 %}
18980
18981 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18982 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18983 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18984 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18985 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18986 ins_encode %{
18987 int vlen_enc = vector_length_encoding(this);
18988 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18989 __ lea($tmp$$Register, $mem$$Address);
18990 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18991 if (elem_bt == T_SHORT) {
18992 __ movl($mask_idx$$Register, 0x55555555);
18993 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18994 }
18995 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18996 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18997 %}
18998 ins_pipe( pipe_slow );
18999 %}
19000
19001 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
19002 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
19003 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
19004 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
19005 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
19006 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
19007 ins_encode %{
19008 int vlen_enc = vector_length_encoding(this);
19009 int vector_len = Matcher::vector_length(this);
19010 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19011 __ lea($tmp$$Register, $mem$$Address);
19012 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
19013 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
19014 if (elem_bt == T_SHORT) {
19015 __ movl($mask_idx$$Register, 0x55555555);
19016 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
19017 }
19018 __ xorl($mask_idx$$Register, $mask_idx$$Register);
19019 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
19020 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
19021 %}
19022 ins_pipe( pipe_slow );
19023 %}
19024
19025 // ====================Scatter=======================================
19026
19027 // Scatter INT, LONG, FLOAT, DOUBLE
19028
19029 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19030 predicate(UseAVX > 2);
19031 match(Set mem (StoreVectorScatter mem (Binary src idx)));
19032 effect(TEMP tmp, TEMP ktmp);
19033 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19034 ins_encode %{
19035 int vlen_enc = vector_length_encoding(this, $src);
19036 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19037
19038 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19039 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19040
19041 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19042 __ lea($tmp$$Register, $mem$$Address);
19043 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19044 %}
19045 ins_pipe( pipe_slow );
19046 %}
19047
19048 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19049 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19050 effect(TEMP tmp, TEMP ktmp);
19051 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19052 ins_encode %{
19053 int vlen_enc = vector_length_encoding(this, $src);
19054 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19055 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19056 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19057 // Note: Since scatter instruction partially updates the opmask register used
19058 // for predication hense moving mask operand to a temporary.
19059 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19060 __ lea($tmp$$Register, $mem$$Address);
19061 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19062 %}
19063 ins_pipe( pipe_slow );
19064 %}
19065
19066 // ====================REPLICATE=======================================
19067
19068 // Replicate byte scalar to be vector
19069 instruct vReplB_reg(vec dst, rRegI src) %{
19070 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19071 match(Set dst (Replicate src));
19072 format %{ "replicateB $dst,$src" %}
19073 ins_encode %{
19074 uint vlen = Matcher::vector_length(this);
19075 if (UseAVX >= 2) {
19076 int vlen_enc = vector_length_encoding(this);
19077 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19078 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19079 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19080 } else {
19081 __ movdl($dst$$XMMRegister, $src$$Register);
19082 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19083 }
19084 } else {
19085 assert(UseAVX < 2, "");
19086 __ movdl($dst$$XMMRegister, $src$$Register);
19087 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19088 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19089 if (vlen >= 16) {
19090 assert(vlen == 16, "");
19091 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19092 }
19093 }
19094 %}
19095 ins_pipe( pipe_slow );
19096 %}
19097
19098 instruct ReplB_mem(vec dst, memory mem) %{
19099 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19100 match(Set dst (Replicate (LoadB mem)));
19101 format %{ "replicateB $dst,$mem" %}
19102 ins_encode %{
19103 int vlen_enc = vector_length_encoding(this);
19104 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19105 %}
19106 ins_pipe( pipe_slow );
19107 %}
19108
19109 // ====================ReplicateS=======================================
19110
19111 instruct vReplS_reg(vec dst, rRegI src) %{
19112 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19113 match(Set dst (Replicate src));
19114 format %{ "replicateS $dst,$src" %}
19115 ins_encode %{
19116 uint vlen = Matcher::vector_length(this);
19117 int vlen_enc = vector_length_encoding(this);
19118 if (UseAVX >= 2) {
19119 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19120 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19121 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19122 } else {
19123 __ movdl($dst$$XMMRegister, $src$$Register);
19124 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19125 }
19126 } else {
19127 assert(UseAVX < 2, "");
19128 __ movdl($dst$$XMMRegister, $src$$Register);
19129 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19130 if (vlen >= 8) {
19131 assert(vlen == 8, "");
19132 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19133 }
19134 }
19135 %}
19136 ins_pipe( pipe_slow );
19137 %}
19138
19139 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19140 match(Set dst (Replicate con));
19141 effect(TEMP rtmp);
19142 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19143 ins_encode %{
19144 int vlen_enc = vector_length_encoding(this);
19145 BasicType bt = Matcher::vector_element_basic_type(this);
19146 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19147 __ movl($rtmp$$Register, $con$$constant);
19148 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19149 %}
19150 ins_pipe( pipe_slow );
19151 %}
19152
19153 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19154 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19155 match(Set dst (Replicate src));
19156 effect(TEMP rtmp);
19157 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19158 ins_encode %{
19159 int vlen_enc = vector_length_encoding(this);
19160 __ evmovw($rtmp$$Register, $src$$XMMRegister);
19161 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19162 %}
19163 ins_pipe( pipe_slow );
19164 %}
19165
19166 instruct ReplS_mem(vec dst, memory mem) %{
19167 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19168 match(Set dst (Replicate (LoadS mem)));
19169 format %{ "replicateS $dst,$mem" %}
19170 ins_encode %{
19171 int vlen_enc = vector_length_encoding(this);
19172 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19173 %}
19174 ins_pipe( pipe_slow );
19175 %}
19176
19177 // ====================ReplicateI=======================================
19178
19179 instruct ReplI_reg(vec dst, rRegI src) %{
19180 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19181 match(Set dst (Replicate src));
19182 format %{ "replicateI $dst,$src" %}
19183 ins_encode %{
19184 uint vlen = Matcher::vector_length(this);
19185 int vlen_enc = vector_length_encoding(this);
19186 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19187 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19188 } else if (VM_Version::supports_avx2()) {
19189 __ movdl($dst$$XMMRegister, $src$$Register);
19190 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19191 } else {
19192 __ movdl($dst$$XMMRegister, $src$$Register);
19193 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19194 }
19195 %}
19196 ins_pipe( pipe_slow );
19197 %}
19198
19199 instruct ReplI_mem(vec dst, memory mem) %{
19200 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19201 match(Set dst (Replicate (LoadI mem)));
19202 format %{ "replicateI $dst,$mem" %}
19203 ins_encode %{
19204 int vlen_enc = vector_length_encoding(this);
19205 if (VM_Version::supports_avx2()) {
19206 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19207 } else if (VM_Version::supports_avx()) {
19208 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19209 } else {
19210 __ movdl($dst$$XMMRegister, $mem$$Address);
19211 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19212 }
19213 %}
19214 ins_pipe( pipe_slow );
19215 %}
19216
19217 instruct ReplI_imm(vec dst, immI con) %{
19218 predicate(Matcher::is_non_long_integral_vector(n));
19219 match(Set dst (Replicate con));
19220 format %{ "replicateI $dst,$con" %}
19221 ins_encode %{
19222 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19223 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19224 type2aelembytes(Matcher::vector_element_basic_type(this))));
19225 BasicType bt = Matcher::vector_element_basic_type(this);
19226 int vlen = Matcher::vector_length_in_bytes(this);
19227 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19228 %}
19229 ins_pipe( pipe_slow );
19230 %}
19231
19232 // Replicate scalar zero to be vector
19233 instruct ReplI_zero(vec dst, immI_0 zero) %{
19234 predicate(Matcher::is_non_long_integral_vector(n));
19235 match(Set dst (Replicate zero));
19236 format %{ "replicateI $dst,$zero" %}
19237 ins_encode %{
19238 int vlen_enc = vector_length_encoding(this);
19239 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19240 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19241 } else {
19242 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19243 }
19244 %}
19245 ins_pipe( fpu_reg_reg );
19246 %}
19247
19248 instruct ReplI_M1(vec dst, immI_M1 con) %{
19249 predicate(Matcher::is_non_long_integral_vector(n));
19250 match(Set dst (Replicate con));
19251 format %{ "vallones $dst" %}
19252 ins_encode %{
19253 int vector_len = vector_length_encoding(this);
19254 __ vallones($dst$$XMMRegister, vector_len);
19255 %}
19256 ins_pipe( pipe_slow );
19257 %}
19258
19259 // ====================ReplicateL=======================================
19260
19261 // Replicate long (8 byte) scalar to be vector
19262 instruct ReplL_reg(vec dst, rRegL src) %{
19263 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19264 match(Set dst (Replicate src));
19265 format %{ "replicateL $dst,$src" %}
19266 ins_encode %{
19267 int vlen = Matcher::vector_length(this);
19268 int vlen_enc = vector_length_encoding(this);
19269 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19270 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19271 } else if (VM_Version::supports_avx2()) {
19272 __ movdq($dst$$XMMRegister, $src$$Register);
19273 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19274 } else {
19275 __ movdq($dst$$XMMRegister, $src$$Register);
19276 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19277 }
19278 %}
19279 ins_pipe( pipe_slow );
19280 %}
19281
19282 instruct ReplL_mem(vec dst, memory mem) %{
19283 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19284 match(Set dst (Replicate (LoadL mem)));
19285 format %{ "replicateL $dst,$mem" %}
19286 ins_encode %{
19287 int vlen_enc = vector_length_encoding(this);
19288 if (VM_Version::supports_avx2()) {
19289 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19290 } else if (VM_Version::supports_sse3()) {
19291 __ movddup($dst$$XMMRegister, $mem$$Address);
19292 } else {
19293 __ movq($dst$$XMMRegister, $mem$$Address);
19294 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19295 }
19296 %}
19297 ins_pipe( pipe_slow );
19298 %}
19299
19300 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19301 instruct ReplL_imm(vec dst, immL con) %{
19302 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19303 match(Set dst (Replicate con));
19304 format %{ "replicateL $dst,$con" %}
19305 ins_encode %{
19306 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19307 int vlen = Matcher::vector_length_in_bytes(this);
19308 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19309 %}
19310 ins_pipe( pipe_slow );
19311 %}
19312
19313 instruct ReplL_zero(vec dst, immL0 zero) %{
19314 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19315 match(Set dst (Replicate zero));
19316 format %{ "replicateL $dst,$zero" %}
19317 ins_encode %{
19318 int vlen_enc = vector_length_encoding(this);
19319 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19320 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19321 } else {
19322 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19323 }
19324 %}
19325 ins_pipe( fpu_reg_reg );
19326 %}
19327
19328 instruct ReplL_M1(vec dst, immL_M1 con) %{
19329 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19330 match(Set dst (Replicate con));
19331 format %{ "vallones $dst" %}
19332 ins_encode %{
19333 int vector_len = vector_length_encoding(this);
19334 __ vallones($dst$$XMMRegister, vector_len);
19335 %}
19336 ins_pipe( pipe_slow );
19337 %}
19338
19339 // ====================ReplicateF=======================================
19340
19341 instruct vReplF_reg(vec dst, vlRegF src) %{
19342 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19343 match(Set dst (Replicate src));
19344 format %{ "replicateF $dst,$src" %}
19345 ins_encode %{
19346 uint vlen = Matcher::vector_length(this);
19347 int vlen_enc = vector_length_encoding(this);
19348 if (vlen <= 4) {
19349 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19350 } else if (VM_Version::supports_avx2()) {
19351 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19352 } else {
19353 assert(vlen == 8, "sanity");
19354 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19355 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19356 }
19357 %}
19358 ins_pipe( pipe_slow );
19359 %}
19360
19361 instruct ReplF_reg(vec dst, vlRegF src) %{
19362 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19363 match(Set dst (Replicate src));
19364 format %{ "replicateF $dst,$src" %}
19365 ins_encode %{
19366 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19367 %}
19368 ins_pipe( pipe_slow );
19369 %}
19370
19371 instruct ReplF_mem(vec dst, memory mem) %{
19372 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19373 match(Set dst (Replicate (LoadF mem)));
19374 format %{ "replicateF $dst,$mem" %}
19375 ins_encode %{
19376 int vlen_enc = vector_length_encoding(this);
19377 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19378 %}
19379 ins_pipe( pipe_slow );
19380 %}
19381
19382 // Replicate float scalar immediate to be vector by loading from const table.
19383 instruct ReplF_imm(vec dst, immF con) %{
19384 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19385 match(Set dst (Replicate con));
19386 format %{ "replicateF $dst,$con" %}
19387 ins_encode %{
19388 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19389 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19390 int vlen = Matcher::vector_length_in_bytes(this);
19391 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19392 %}
19393 ins_pipe( pipe_slow );
19394 %}
19395
19396 instruct ReplF_zero(vec dst, immF0 zero) %{
19397 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19398 match(Set dst (Replicate zero));
19399 format %{ "replicateF $dst,$zero" %}
19400 ins_encode %{
19401 int vlen_enc = vector_length_encoding(this);
19402 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19403 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19404 } else {
19405 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19406 }
19407 %}
19408 ins_pipe( fpu_reg_reg );
19409 %}
19410
19411 // ====================ReplicateD=======================================
19412
19413 // Replicate double (8 bytes) scalar to be vector
19414 instruct vReplD_reg(vec dst, vlRegD src) %{
19415 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19416 match(Set dst (Replicate src));
19417 format %{ "replicateD $dst,$src" %}
19418 ins_encode %{
19419 uint vlen = Matcher::vector_length(this);
19420 int vlen_enc = vector_length_encoding(this);
19421 if (vlen <= 2) {
19422 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19423 } else if (VM_Version::supports_avx2()) {
19424 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19425 } else {
19426 assert(vlen == 4, "sanity");
19427 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19428 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19429 }
19430 %}
19431 ins_pipe( pipe_slow );
19432 %}
19433
19434 instruct ReplD_reg(vec dst, vlRegD src) %{
19435 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19436 match(Set dst (Replicate src));
19437 format %{ "replicateD $dst,$src" %}
19438 ins_encode %{
19439 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19440 %}
19441 ins_pipe( pipe_slow );
19442 %}
19443
19444 instruct ReplD_mem(vec dst, memory mem) %{
19445 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19446 match(Set dst (Replicate (LoadD mem)));
19447 format %{ "replicateD $dst,$mem" %}
19448 ins_encode %{
19449 if (Matcher::vector_length(this) >= 4) {
19450 int vlen_enc = vector_length_encoding(this);
19451 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19452 } else {
19453 __ movddup($dst$$XMMRegister, $mem$$Address);
19454 }
19455 %}
19456 ins_pipe( pipe_slow );
19457 %}
19458
19459 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19460 instruct ReplD_imm(vec dst, immD con) %{
19461 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19462 match(Set dst (Replicate con));
19463 format %{ "replicateD $dst,$con" %}
19464 ins_encode %{
19465 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19466 int vlen = Matcher::vector_length_in_bytes(this);
19467 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19468 %}
19469 ins_pipe( pipe_slow );
19470 %}
19471
19472 instruct ReplD_zero(vec dst, immD0 zero) %{
19473 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19474 match(Set dst (Replicate zero));
19475 format %{ "replicateD $dst,$zero" %}
19476 ins_encode %{
19477 int vlen_enc = vector_length_encoding(this);
19478 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19479 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19480 } else {
19481 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19482 }
19483 %}
19484 ins_pipe( fpu_reg_reg );
19485 %}
19486
19487 // ====================VECTOR INSERT=======================================
19488
19489 instruct insert(vec dst, rRegI val, immU8 idx) %{
19490 predicate(Matcher::vector_length_in_bytes(n) < 32);
19491 match(Set dst (VectorInsert (Binary dst val) idx));
19492 format %{ "vector_insert $dst,$val,$idx" %}
19493 ins_encode %{
19494 assert(UseSSE >= 4, "required");
19495 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19496
19497 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19498
19499 assert(is_integral_type(elem_bt), "");
19500 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19501
19502 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19503 %}
19504 ins_pipe( pipe_slow );
19505 %}
19506
19507 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19508 predicate(Matcher::vector_length_in_bytes(n) == 32);
19509 match(Set dst (VectorInsert (Binary src val) idx));
19510 effect(TEMP vtmp);
19511 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19512 ins_encode %{
19513 int vlen_enc = Assembler::AVX_256bit;
19514 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19515 int elem_per_lane = 16/type2aelembytes(elem_bt);
19516 int log2epr = log2(elem_per_lane);
19517
19518 assert(is_integral_type(elem_bt), "sanity");
19519 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19520
19521 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19522 uint y_idx = ($idx$$constant >> log2epr) & 1;
19523 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19524 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19525 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19526 %}
19527 ins_pipe( pipe_slow );
19528 %}
19529
19530 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19531 predicate(Matcher::vector_length_in_bytes(n) == 64);
19532 match(Set dst (VectorInsert (Binary src val) idx));
19533 effect(TEMP vtmp);
19534 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19535 ins_encode %{
19536 assert(UseAVX > 2, "sanity");
19537
19538 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19539 int elem_per_lane = 16/type2aelembytes(elem_bt);
19540 int log2epr = log2(elem_per_lane);
19541
19542 assert(is_integral_type(elem_bt), "");
19543 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19544
19545 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19546 uint y_idx = ($idx$$constant >> log2epr) & 3;
19547 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19548 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19549 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19550 %}
19551 ins_pipe( pipe_slow );
19552 %}
19553
19554 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19555 predicate(Matcher::vector_length(n) == 2);
19556 match(Set dst (VectorInsert (Binary dst val) idx));
19557 format %{ "vector_insert $dst,$val,$idx" %}
19558 ins_encode %{
19559 assert(UseSSE >= 4, "required");
19560 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19561 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19562
19563 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19564 %}
19565 ins_pipe( pipe_slow );
19566 %}
19567
19568 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19569 predicate(Matcher::vector_length(n) == 4);
19570 match(Set dst (VectorInsert (Binary src val) idx));
19571 effect(TEMP vtmp);
19572 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19573 ins_encode %{
19574 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19575 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19576
19577 uint x_idx = $idx$$constant & right_n_bits(1);
19578 uint y_idx = ($idx$$constant >> 1) & 1;
19579 int vlen_enc = Assembler::AVX_256bit;
19580 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19581 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19582 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19583 %}
19584 ins_pipe( pipe_slow );
19585 %}
19586
19587 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19588 predicate(Matcher::vector_length(n) == 8);
19589 match(Set dst (VectorInsert (Binary src val) idx));
19590 effect(TEMP vtmp);
19591 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19592 ins_encode %{
19593 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19594 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19595
19596 uint x_idx = $idx$$constant & right_n_bits(1);
19597 uint y_idx = ($idx$$constant >> 1) & 3;
19598 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19599 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19600 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19601 %}
19602 ins_pipe( pipe_slow );
19603 %}
19604
19605 instruct insertF(vec dst, regF val, immU8 idx) %{
19606 predicate(Matcher::vector_length(n) < 8);
19607 match(Set dst (VectorInsert (Binary dst val) idx));
19608 format %{ "vector_insert $dst,$val,$idx" %}
19609 ins_encode %{
19610 assert(UseSSE >= 4, "sanity");
19611
19612 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19613 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19614
19615 uint x_idx = $idx$$constant & right_n_bits(2);
19616 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19617 %}
19618 ins_pipe( pipe_slow );
19619 %}
19620
19621 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19622 predicate(Matcher::vector_length(n) >= 8);
19623 match(Set dst (VectorInsert (Binary src val) idx));
19624 effect(TEMP vtmp);
19625 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19626 ins_encode %{
19627 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19628 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19629
19630 int vlen = Matcher::vector_length(this);
19631 uint x_idx = $idx$$constant & right_n_bits(2);
19632 if (vlen == 8) {
19633 uint y_idx = ($idx$$constant >> 2) & 1;
19634 int vlen_enc = Assembler::AVX_256bit;
19635 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19636 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19637 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19638 } else {
19639 assert(vlen == 16, "sanity");
19640 uint y_idx = ($idx$$constant >> 2) & 3;
19641 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19642 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19643 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19644 }
19645 %}
19646 ins_pipe( pipe_slow );
19647 %}
19648
19649 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19650 predicate(Matcher::vector_length(n) == 2);
19651 match(Set dst (VectorInsert (Binary dst val) idx));
19652 effect(TEMP tmp);
19653 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19654 ins_encode %{
19655 assert(UseSSE >= 4, "sanity");
19656 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19657 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19658
19659 __ movq($tmp$$Register, $val$$XMMRegister);
19660 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19661 %}
19662 ins_pipe( pipe_slow );
19663 %}
19664
19665 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19666 predicate(Matcher::vector_length(n) == 4);
19667 match(Set dst (VectorInsert (Binary src val) idx));
19668 effect(TEMP vtmp, TEMP tmp);
19669 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19670 ins_encode %{
19671 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19672 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19673
19674 uint x_idx = $idx$$constant & right_n_bits(1);
19675 uint y_idx = ($idx$$constant >> 1) & 1;
19676 int vlen_enc = Assembler::AVX_256bit;
19677 __ movq($tmp$$Register, $val$$XMMRegister);
19678 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19679 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19680 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19681 %}
19682 ins_pipe( pipe_slow );
19683 %}
19684
19685 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19686 predicate(Matcher::vector_length(n) == 8);
19687 match(Set dst (VectorInsert (Binary src val) idx));
19688 effect(TEMP tmp, TEMP vtmp);
19689 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19690 ins_encode %{
19691 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19692 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19693
19694 uint x_idx = $idx$$constant & right_n_bits(1);
19695 uint y_idx = ($idx$$constant >> 1) & 3;
19696 __ movq($tmp$$Register, $val$$XMMRegister);
19697 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19698 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19699 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19700 %}
19701 ins_pipe( pipe_slow );
19702 %}
19703
19704 // ====================REDUCTION ARITHMETIC=======================================
19705
19706 // =======================Int Reduction==========================================
19707
19708 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19709 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19710 match(Set dst (AddReductionVI src1 src2));
19711 match(Set dst (MulReductionVI src1 src2));
19712 match(Set dst (AndReductionV src1 src2));
19713 match(Set dst ( OrReductionV src1 src2));
19714 match(Set dst (XorReductionV src1 src2));
19715 match(Set dst (MinReductionV src1 src2));
19716 match(Set dst (MaxReductionV src1 src2));
19717 match(Set dst (UMinReductionV src1 src2));
19718 match(Set dst (UMaxReductionV src1 src2));
19719 effect(TEMP vtmp1, TEMP vtmp2);
19720 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19721 ins_encode %{
19722 int opcode = this->ideal_Opcode();
19723 int vlen = Matcher::vector_length(this, $src2);
19724 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19725 %}
19726 ins_pipe( pipe_slow );
19727 %}
19728
19729 // =======================Long Reduction==========================================
19730
19731 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19732 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19733 match(Set dst (AddReductionVL src1 src2));
19734 match(Set dst (MulReductionVL src1 src2));
19735 match(Set dst (AndReductionV src1 src2));
19736 match(Set dst ( OrReductionV src1 src2));
19737 match(Set dst (XorReductionV src1 src2));
19738 match(Set dst (MinReductionV src1 src2));
19739 match(Set dst (MaxReductionV src1 src2));
19740 match(Set dst (UMinReductionV src1 src2));
19741 match(Set dst (UMaxReductionV src1 src2));
19742 effect(TEMP vtmp1, TEMP vtmp2);
19743 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19744 ins_encode %{
19745 int opcode = this->ideal_Opcode();
19746 int vlen = Matcher::vector_length(this, $src2);
19747 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19748 %}
19749 ins_pipe( pipe_slow );
19750 %}
19751
19752 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19753 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19754 match(Set dst (AddReductionVL src1 src2));
19755 match(Set dst (MulReductionVL src1 src2));
19756 match(Set dst (AndReductionV src1 src2));
19757 match(Set dst ( OrReductionV src1 src2));
19758 match(Set dst (XorReductionV src1 src2));
19759 match(Set dst (MinReductionV src1 src2));
19760 match(Set dst (MaxReductionV src1 src2));
19761 match(Set dst (UMinReductionV src1 src2));
19762 match(Set dst (UMaxReductionV src1 src2));
19763 effect(TEMP vtmp1, TEMP vtmp2);
19764 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19765 ins_encode %{
19766 int opcode = this->ideal_Opcode();
19767 int vlen = Matcher::vector_length(this, $src2);
19768 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19769 %}
19770 ins_pipe( pipe_slow );
19771 %}
19772
19773 // =======================Float Reduction==========================================
19774
19775 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19776 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19777 match(Set dst (AddReductionVF dst src));
19778 match(Set dst (MulReductionVF dst src));
19779 effect(TEMP dst, TEMP vtmp);
19780 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19781 ins_encode %{
19782 int opcode = this->ideal_Opcode();
19783 int vlen = Matcher::vector_length(this, $src);
19784 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19785 %}
19786 ins_pipe( pipe_slow );
19787 %}
19788
19789 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19790 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19791 match(Set dst (AddReductionVF dst src));
19792 match(Set dst (MulReductionVF dst src));
19793 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19794 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19795 ins_encode %{
19796 int opcode = this->ideal_Opcode();
19797 int vlen = Matcher::vector_length(this, $src);
19798 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19799 %}
19800 ins_pipe( pipe_slow );
19801 %}
19802
19803 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19804 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19805 match(Set dst (AddReductionVF dst src));
19806 match(Set dst (MulReductionVF dst src));
19807 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19808 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19809 ins_encode %{
19810 int opcode = this->ideal_Opcode();
19811 int vlen = Matcher::vector_length(this, $src);
19812 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19813 %}
19814 ins_pipe( pipe_slow );
19815 %}
19816
19817
19818 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19819 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19820 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19821 // src1 contains reduction identity
19822 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19823 match(Set dst (AddReductionVF src1 src2));
19824 match(Set dst (MulReductionVF src1 src2));
19825 effect(TEMP dst);
19826 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19827 ins_encode %{
19828 int opcode = this->ideal_Opcode();
19829 int vlen = Matcher::vector_length(this, $src2);
19830 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19831 %}
19832 ins_pipe( pipe_slow );
19833 %}
19834
19835 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19836 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19837 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19838 // src1 contains reduction identity
19839 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19840 match(Set dst (AddReductionVF src1 src2));
19841 match(Set dst (MulReductionVF src1 src2));
19842 effect(TEMP dst, TEMP vtmp);
19843 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19844 ins_encode %{
19845 int opcode = this->ideal_Opcode();
19846 int vlen = Matcher::vector_length(this, $src2);
19847 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19848 %}
19849 ins_pipe( pipe_slow );
19850 %}
19851
19852 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19853 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19854 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19855 // src1 contains reduction identity
19856 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19857 match(Set dst (AddReductionVF src1 src2));
19858 match(Set dst (MulReductionVF src1 src2));
19859 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19860 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19861 ins_encode %{
19862 int opcode = this->ideal_Opcode();
19863 int vlen = Matcher::vector_length(this, $src2);
19864 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19865 %}
19866 ins_pipe( pipe_slow );
19867 %}
19868
19869 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19870 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19871 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19872 // src1 contains reduction identity
19873 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19874 match(Set dst (AddReductionVF src1 src2));
19875 match(Set dst (MulReductionVF src1 src2));
19876 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19877 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19878 ins_encode %{
19879 int opcode = this->ideal_Opcode();
19880 int vlen = Matcher::vector_length(this, $src2);
19881 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19882 %}
19883 ins_pipe( pipe_slow );
19884 %}
19885
19886 // =======================Double Reduction==========================================
19887
19888 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19889 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19890 match(Set dst (AddReductionVD dst src));
19891 match(Set dst (MulReductionVD dst src));
19892 effect(TEMP dst, TEMP vtmp);
19893 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19894 ins_encode %{
19895 int opcode = this->ideal_Opcode();
19896 int vlen = Matcher::vector_length(this, $src);
19897 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19898 %}
19899 ins_pipe( pipe_slow );
19900 %}
19901
19902 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19903 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19904 match(Set dst (AddReductionVD dst src));
19905 match(Set dst (MulReductionVD dst src));
19906 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19907 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19908 ins_encode %{
19909 int opcode = this->ideal_Opcode();
19910 int vlen = Matcher::vector_length(this, $src);
19911 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19912 %}
19913 ins_pipe( pipe_slow );
19914 %}
19915
19916 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19917 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19918 match(Set dst (AddReductionVD dst src));
19919 match(Set dst (MulReductionVD dst src));
19920 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19921 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19922 ins_encode %{
19923 int opcode = this->ideal_Opcode();
19924 int vlen = Matcher::vector_length(this, $src);
19925 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19926 %}
19927 ins_pipe( pipe_slow );
19928 %}
19929
19930 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19931 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19932 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19933 // src1 contains reduction identity
19934 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19935 match(Set dst (AddReductionVD src1 src2));
19936 match(Set dst (MulReductionVD src1 src2));
19937 effect(TEMP dst);
19938 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19939 ins_encode %{
19940 int opcode = this->ideal_Opcode();
19941 int vlen = Matcher::vector_length(this, $src2);
19942 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19943 %}
19944 ins_pipe( pipe_slow );
19945 %}
19946
19947 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19948 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19949 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19950 // src1 contains reduction identity
19951 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19952 match(Set dst (AddReductionVD src1 src2));
19953 match(Set dst (MulReductionVD src1 src2));
19954 effect(TEMP dst, TEMP vtmp);
19955 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19956 ins_encode %{
19957 int opcode = this->ideal_Opcode();
19958 int vlen = Matcher::vector_length(this, $src2);
19959 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19960 %}
19961 ins_pipe( pipe_slow );
19962 %}
19963
19964 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19965 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19966 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19967 // src1 contains reduction identity
19968 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19969 match(Set dst (AddReductionVD src1 src2));
19970 match(Set dst (MulReductionVD src1 src2));
19971 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19972 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19973 ins_encode %{
19974 int opcode = this->ideal_Opcode();
19975 int vlen = Matcher::vector_length(this, $src2);
19976 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19977 %}
19978 ins_pipe( pipe_slow );
19979 %}
19980
19981 // =======================Byte Reduction==========================================
19982
19983 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19984 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19985 match(Set dst (AddReductionVI src1 src2));
19986 match(Set dst (AndReductionV src1 src2));
19987 match(Set dst ( OrReductionV src1 src2));
19988 match(Set dst (XorReductionV src1 src2));
19989 match(Set dst (MinReductionV src1 src2));
19990 match(Set dst (MaxReductionV src1 src2));
19991 match(Set dst (UMinReductionV src1 src2));
19992 match(Set dst (UMaxReductionV src1 src2));
19993 effect(TEMP vtmp1, TEMP vtmp2);
19994 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19995 ins_encode %{
19996 int opcode = this->ideal_Opcode();
19997 int vlen = Matcher::vector_length(this, $src2);
19998 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19999 %}
20000 ins_pipe( pipe_slow );
20001 %}
20002
20003 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20004 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
20005 match(Set dst (AddReductionVI src1 src2));
20006 match(Set dst (AndReductionV src1 src2));
20007 match(Set dst ( OrReductionV src1 src2));
20008 match(Set dst (XorReductionV src1 src2));
20009 match(Set dst (MinReductionV src1 src2));
20010 match(Set dst (MaxReductionV src1 src2));
20011 match(Set dst (UMinReductionV src1 src2));
20012 match(Set dst (UMaxReductionV src1 src2));
20013 effect(TEMP vtmp1, TEMP vtmp2);
20014 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20015 ins_encode %{
20016 int opcode = this->ideal_Opcode();
20017 int vlen = Matcher::vector_length(this, $src2);
20018 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20019 %}
20020 ins_pipe( pipe_slow );
20021 %}
20022
20023 // =======================Short Reduction==========================================
20024
20025 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20026 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20027 match(Set dst (AddReductionVI src1 src2));
20028 match(Set dst (MulReductionVI src1 src2));
20029 match(Set dst (AndReductionV src1 src2));
20030 match(Set dst ( OrReductionV src1 src2));
20031 match(Set dst (XorReductionV src1 src2));
20032 match(Set dst (MinReductionV src1 src2));
20033 match(Set dst (MaxReductionV src1 src2));
20034 match(Set dst (UMinReductionV src1 src2));
20035 match(Set dst (UMaxReductionV src1 src2));
20036 effect(TEMP vtmp1, TEMP vtmp2);
20037 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20038 ins_encode %{
20039 int opcode = this->ideal_Opcode();
20040 int vlen = Matcher::vector_length(this, $src2);
20041 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20042 %}
20043 ins_pipe( pipe_slow );
20044 %}
20045
20046 // =======================Mul Reduction==========================================
20047
20048 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20049 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20050 Matcher::vector_length(n->in(2)) <= 32); // src2
20051 match(Set dst (MulReductionVI src1 src2));
20052 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20053 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20054 ins_encode %{
20055 int opcode = this->ideal_Opcode();
20056 int vlen = Matcher::vector_length(this, $src2);
20057 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20058 %}
20059 ins_pipe( pipe_slow );
20060 %}
20061
20062 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20063 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20064 Matcher::vector_length(n->in(2)) == 64); // src2
20065 match(Set dst (MulReductionVI src1 src2));
20066 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20067 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20068 ins_encode %{
20069 int opcode = this->ideal_Opcode();
20070 int vlen = Matcher::vector_length(this, $src2);
20071 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20072 %}
20073 ins_pipe( pipe_slow );
20074 %}
20075
20076 //--------------------Min/Max Float Reduction --------------------
20077 // Float Min Reduction
20078 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20079 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20080 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20081 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20082 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20083 Matcher::vector_length(n->in(2)) == 2);
20084 match(Set dst (MinReductionV src1 src2));
20085 match(Set dst (MaxReductionV src1 src2));
20086 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20087 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20088 ins_encode %{
20089 assert(UseAVX > 0, "sanity");
20090
20091 int opcode = this->ideal_Opcode();
20092 int vlen = Matcher::vector_length(this, $src2);
20093 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20094 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20095 %}
20096 ins_pipe( pipe_slow );
20097 %}
20098
20099 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20100 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20101 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20102 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20103 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20104 Matcher::vector_length(n->in(2)) >= 4);
20105 match(Set dst (MinReductionV src1 src2));
20106 match(Set dst (MaxReductionV src1 src2));
20107 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20108 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20109 ins_encode %{
20110 assert(UseAVX > 0, "sanity");
20111
20112 int opcode = this->ideal_Opcode();
20113 int vlen = Matcher::vector_length(this, $src2);
20114 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20115 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20116 %}
20117 ins_pipe( pipe_slow );
20118 %}
20119
20120 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20121 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20122 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20123 Matcher::vector_length(n->in(2)) == 2);
20124 match(Set dst (MinReductionV dst src));
20125 match(Set dst (MaxReductionV dst src));
20126 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20127 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20128 ins_encode %{
20129 assert(UseAVX > 0, "sanity");
20130
20131 int opcode = this->ideal_Opcode();
20132 int vlen = Matcher::vector_length(this, $src);
20133 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20134 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20135 %}
20136 ins_pipe( pipe_slow );
20137 %}
20138
20139
20140 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20141 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20142 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20143 Matcher::vector_length(n->in(2)) >= 4);
20144 match(Set dst (MinReductionV dst src));
20145 match(Set dst (MaxReductionV dst src));
20146 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20147 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20148 ins_encode %{
20149 assert(UseAVX > 0, "sanity");
20150
20151 int opcode = this->ideal_Opcode();
20152 int vlen = Matcher::vector_length(this, $src);
20153 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20154 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20155 %}
20156 ins_pipe( pipe_slow );
20157 %}
20158
20159 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20160 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20161 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20162 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20163 Matcher::vector_length(n->in(2)) == 2);
20164 match(Set dst (MinReductionV src1 src2));
20165 match(Set dst (MaxReductionV src1 src2));
20166 effect(TEMP dst, TEMP xtmp1);
20167 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20168 ins_encode %{
20169 int opcode = this->ideal_Opcode();
20170 int vlen = Matcher::vector_length(this, $src2);
20171 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20172 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20173 %}
20174 ins_pipe( pipe_slow );
20175 %}
20176
20177 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20178 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20179 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20180 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20181 Matcher::vector_length(n->in(2)) >= 4);
20182 match(Set dst (MinReductionV src1 src2));
20183 match(Set dst (MaxReductionV src1 src2));
20184 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20185 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20186 ins_encode %{
20187 int opcode = this->ideal_Opcode();
20188 int vlen = Matcher::vector_length(this, $src2);
20189 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20190 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20191 %}
20192 ins_pipe( pipe_slow );
20193 %}
20194
20195 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20196 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20197 Matcher::vector_length(n->in(2)) == 2);
20198 match(Set dst (MinReductionV dst src));
20199 match(Set dst (MaxReductionV dst src));
20200 effect(TEMP dst, TEMP xtmp1);
20201 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20202 ins_encode %{
20203 int opcode = this->ideal_Opcode();
20204 int vlen = Matcher::vector_length(this, $src);
20205 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20206 $xtmp1$$XMMRegister);
20207 %}
20208 ins_pipe( pipe_slow );
20209 %}
20210
20211 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20212 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20213 Matcher::vector_length(n->in(2)) >= 4);
20214 match(Set dst (MinReductionV dst src));
20215 match(Set dst (MaxReductionV dst src));
20216 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20217 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20218 ins_encode %{
20219 int opcode = this->ideal_Opcode();
20220 int vlen = Matcher::vector_length(this, $src);
20221 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20222 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20223 %}
20224 ins_pipe( pipe_slow );
20225 %}
20226
20227 //--------------------Min Double Reduction --------------------
20228 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20229 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20230 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20231 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20232 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20233 Matcher::vector_length(n->in(2)) == 2);
20234 match(Set dst (MinReductionV src1 src2));
20235 match(Set dst (MaxReductionV src1 src2));
20236 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20237 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20238 ins_encode %{
20239 assert(UseAVX > 0, "sanity");
20240
20241 int opcode = this->ideal_Opcode();
20242 int vlen = Matcher::vector_length(this, $src2);
20243 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20244 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20245 %}
20246 ins_pipe( pipe_slow );
20247 %}
20248
20249 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20250 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20251 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20252 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20253 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20254 Matcher::vector_length(n->in(2)) >= 4);
20255 match(Set dst (MinReductionV src1 src2));
20256 match(Set dst (MaxReductionV src1 src2));
20257 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20258 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20259 ins_encode %{
20260 assert(UseAVX > 0, "sanity");
20261
20262 int opcode = this->ideal_Opcode();
20263 int vlen = Matcher::vector_length(this, $src2);
20264 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20265 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20266 %}
20267 ins_pipe( pipe_slow );
20268 %}
20269
20270
20271 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20272 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20273 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20274 Matcher::vector_length(n->in(2)) == 2);
20275 match(Set dst (MinReductionV dst src));
20276 match(Set dst (MaxReductionV dst src));
20277 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20278 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20279 ins_encode %{
20280 assert(UseAVX > 0, "sanity");
20281
20282 int opcode = this->ideal_Opcode();
20283 int vlen = Matcher::vector_length(this, $src);
20284 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20285 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20286 %}
20287 ins_pipe( pipe_slow );
20288 %}
20289
20290 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20291 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20292 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20293 Matcher::vector_length(n->in(2)) >= 4);
20294 match(Set dst (MinReductionV dst src));
20295 match(Set dst (MaxReductionV dst src));
20296 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20297 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20298 ins_encode %{
20299 assert(UseAVX > 0, "sanity");
20300
20301 int opcode = this->ideal_Opcode();
20302 int vlen = Matcher::vector_length(this, $src);
20303 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20304 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20305 %}
20306 ins_pipe( pipe_slow );
20307 %}
20308
20309 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20310 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20311 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20312 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20313 Matcher::vector_length(n->in(2)) == 2);
20314 match(Set dst (MinReductionV src1 src2));
20315 match(Set dst (MaxReductionV src1 src2));
20316 effect(TEMP dst, TEMP xtmp1);
20317 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20318 ins_encode %{
20319 int opcode = this->ideal_Opcode();
20320 int vlen = Matcher::vector_length(this, $src2);
20321 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20322 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20323 %}
20324 ins_pipe( pipe_slow );
20325 %}
20326
20327 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20328 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20329 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20330 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20331 Matcher::vector_length(n->in(2)) >= 4);
20332 match(Set dst (MinReductionV src1 src2));
20333 match(Set dst (MaxReductionV src1 src2));
20334 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20335 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20336 ins_encode %{
20337 int opcode = this->ideal_Opcode();
20338 int vlen = Matcher::vector_length(this, $src2);
20339 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20340 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20341 %}
20342 ins_pipe( pipe_slow );
20343 %}
20344
20345
20346 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20347 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20348 Matcher::vector_length(n->in(2)) == 2);
20349 match(Set dst (MinReductionV dst src));
20350 match(Set dst (MaxReductionV dst src));
20351 effect(TEMP dst, TEMP xtmp1);
20352 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20353 ins_encode %{
20354 int opcode = this->ideal_Opcode();
20355 int vlen = Matcher::vector_length(this, $src);
20356 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20357 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20358 %}
20359 ins_pipe( pipe_slow );
20360 %}
20361
20362 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20363 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20364 Matcher::vector_length(n->in(2)) >= 4);
20365 match(Set dst (MinReductionV dst src));
20366 match(Set dst (MaxReductionV dst src));
20367 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20368 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20369 ins_encode %{
20370 int opcode = this->ideal_Opcode();
20371 int vlen = Matcher::vector_length(this, $src);
20372 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20373 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20374 %}
20375 ins_pipe( pipe_slow );
20376 %}
20377
20378 // ====================VECTOR ARITHMETIC=======================================
20379
20380 // --------------------------------- ADD --------------------------------------
20381
20382 // Bytes vector add
20383 instruct vaddB(vec dst, vec src) %{
20384 predicate(UseAVX == 0);
20385 match(Set dst (AddVB dst src));
20386 format %{ "paddb $dst,$src\t! add packedB" %}
20387 ins_encode %{
20388 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20389 %}
20390 ins_pipe( pipe_slow );
20391 %}
20392
20393 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20394 predicate(UseAVX > 0);
20395 match(Set dst (AddVB src1 src2));
20396 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20397 ins_encode %{
20398 int vlen_enc = vector_length_encoding(this);
20399 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20400 %}
20401 ins_pipe( pipe_slow );
20402 %}
20403
20404 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20405 predicate((UseAVX > 0) &&
20406 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20407 match(Set dst (AddVB src (LoadVector mem)));
20408 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20409 ins_encode %{
20410 int vlen_enc = vector_length_encoding(this);
20411 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20412 %}
20413 ins_pipe( pipe_slow );
20414 %}
20415
20416 // Shorts/Chars vector add
20417 instruct vaddS(vec dst, vec src) %{
20418 predicate(UseAVX == 0);
20419 match(Set dst (AddVS dst src));
20420 format %{ "paddw $dst,$src\t! add packedS" %}
20421 ins_encode %{
20422 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20423 %}
20424 ins_pipe( pipe_slow );
20425 %}
20426
20427 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20428 predicate(UseAVX > 0);
20429 match(Set dst (AddVS src1 src2));
20430 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20431 ins_encode %{
20432 int vlen_enc = vector_length_encoding(this);
20433 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20434 %}
20435 ins_pipe( pipe_slow );
20436 %}
20437
20438 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20439 predicate((UseAVX > 0) &&
20440 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20441 match(Set dst (AddVS src (LoadVector mem)));
20442 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20443 ins_encode %{
20444 int vlen_enc = vector_length_encoding(this);
20445 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20446 %}
20447 ins_pipe( pipe_slow );
20448 %}
20449
20450 // Integers vector add
20451 instruct vaddI(vec dst, vec src) %{
20452 predicate(UseAVX == 0);
20453 match(Set dst (AddVI dst src));
20454 format %{ "paddd $dst,$src\t! add packedI" %}
20455 ins_encode %{
20456 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20457 %}
20458 ins_pipe( pipe_slow );
20459 %}
20460
20461 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20462 predicate(UseAVX > 0);
20463 match(Set dst (AddVI src1 src2));
20464 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20465 ins_encode %{
20466 int vlen_enc = vector_length_encoding(this);
20467 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20468 %}
20469 ins_pipe( pipe_slow );
20470 %}
20471
20472
20473 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20474 predicate((UseAVX > 0) &&
20475 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20476 match(Set dst (AddVI src (LoadVector mem)));
20477 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20478 ins_encode %{
20479 int vlen_enc = vector_length_encoding(this);
20480 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20481 %}
20482 ins_pipe( pipe_slow );
20483 %}
20484
20485 // Longs vector add
20486 instruct vaddL(vec dst, vec src) %{
20487 predicate(UseAVX == 0);
20488 match(Set dst (AddVL dst src));
20489 format %{ "paddq $dst,$src\t! add packedL" %}
20490 ins_encode %{
20491 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20492 %}
20493 ins_pipe( pipe_slow );
20494 %}
20495
20496 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20497 predicate(UseAVX > 0);
20498 match(Set dst (AddVL src1 src2));
20499 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20500 ins_encode %{
20501 int vlen_enc = vector_length_encoding(this);
20502 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20503 %}
20504 ins_pipe( pipe_slow );
20505 %}
20506
20507 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20508 predicate((UseAVX > 0) &&
20509 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20510 match(Set dst (AddVL src (LoadVector mem)));
20511 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20512 ins_encode %{
20513 int vlen_enc = vector_length_encoding(this);
20514 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20515 %}
20516 ins_pipe( pipe_slow );
20517 %}
20518
20519 // Floats vector add
20520 instruct vaddF(vec dst, vec src) %{
20521 predicate(UseAVX == 0);
20522 match(Set dst (AddVF dst src));
20523 format %{ "addps $dst,$src\t! add packedF" %}
20524 ins_encode %{
20525 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20526 %}
20527 ins_pipe( pipe_slow );
20528 %}
20529
20530 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20531 predicate(UseAVX > 0);
20532 match(Set dst (AddVF src1 src2));
20533 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20534 ins_encode %{
20535 int vlen_enc = vector_length_encoding(this);
20536 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20537 %}
20538 ins_pipe( pipe_slow );
20539 %}
20540
20541 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20542 predicate((UseAVX > 0) &&
20543 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20544 match(Set dst (AddVF src (LoadVector mem)));
20545 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20546 ins_encode %{
20547 int vlen_enc = vector_length_encoding(this);
20548 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20549 %}
20550 ins_pipe( pipe_slow );
20551 %}
20552
20553 // Doubles vector add
20554 instruct vaddD(vec dst, vec src) %{
20555 predicate(UseAVX == 0);
20556 match(Set dst (AddVD dst src));
20557 format %{ "addpd $dst,$src\t! add packedD" %}
20558 ins_encode %{
20559 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20560 %}
20561 ins_pipe( pipe_slow );
20562 %}
20563
20564 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20565 predicate(UseAVX > 0);
20566 match(Set dst (AddVD src1 src2));
20567 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20568 ins_encode %{
20569 int vlen_enc = vector_length_encoding(this);
20570 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20571 %}
20572 ins_pipe( pipe_slow );
20573 %}
20574
20575 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20576 predicate((UseAVX > 0) &&
20577 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20578 match(Set dst (AddVD src (LoadVector mem)));
20579 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20580 ins_encode %{
20581 int vlen_enc = vector_length_encoding(this);
20582 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20583 %}
20584 ins_pipe( pipe_slow );
20585 %}
20586
20587 // --------------------------------- SUB --------------------------------------
20588
20589 // Bytes vector sub
20590 instruct vsubB(vec dst, vec src) %{
20591 predicate(UseAVX == 0);
20592 match(Set dst (SubVB dst src));
20593 format %{ "psubb $dst,$src\t! sub packedB" %}
20594 ins_encode %{
20595 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20596 %}
20597 ins_pipe( pipe_slow );
20598 %}
20599
20600 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20601 predicate(UseAVX > 0);
20602 match(Set dst (SubVB src1 src2));
20603 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20604 ins_encode %{
20605 int vlen_enc = vector_length_encoding(this);
20606 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20607 %}
20608 ins_pipe( pipe_slow );
20609 %}
20610
20611 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20612 predicate((UseAVX > 0) &&
20613 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20614 match(Set dst (SubVB src (LoadVector mem)));
20615 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20616 ins_encode %{
20617 int vlen_enc = vector_length_encoding(this);
20618 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20619 %}
20620 ins_pipe( pipe_slow );
20621 %}
20622
20623 // Shorts/Chars vector sub
20624 instruct vsubS(vec dst, vec src) %{
20625 predicate(UseAVX == 0);
20626 match(Set dst (SubVS dst src));
20627 format %{ "psubw $dst,$src\t! sub packedS" %}
20628 ins_encode %{
20629 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20630 %}
20631 ins_pipe( pipe_slow );
20632 %}
20633
20634
20635 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20636 predicate(UseAVX > 0);
20637 match(Set dst (SubVS src1 src2));
20638 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20639 ins_encode %{
20640 int vlen_enc = vector_length_encoding(this);
20641 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20642 %}
20643 ins_pipe( pipe_slow );
20644 %}
20645
20646 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20647 predicate((UseAVX > 0) &&
20648 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20649 match(Set dst (SubVS src (LoadVector mem)));
20650 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20651 ins_encode %{
20652 int vlen_enc = vector_length_encoding(this);
20653 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20654 %}
20655 ins_pipe( pipe_slow );
20656 %}
20657
20658 // Integers vector sub
20659 instruct vsubI(vec dst, vec src) %{
20660 predicate(UseAVX == 0);
20661 match(Set dst (SubVI dst src));
20662 format %{ "psubd $dst,$src\t! sub packedI" %}
20663 ins_encode %{
20664 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20665 %}
20666 ins_pipe( pipe_slow );
20667 %}
20668
20669 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20670 predicate(UseAVX > 0);
20671 match(Set dst (SubVI src1 src2));
20672 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20673 ins_encode %{
20674 int vlen_enc = vector_length_encoding(this);
20675 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20676 %}
20677 ins_pipe( pipe_slow );
20678 %}
20679
20680 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20681 predicate((UseAVX > 0) &&
20682 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20683 match(Set dst (SubVI src (LoadVector mem)));
20684 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20685 ins_encode %{
20686 int vlen_enc = vector_length_encoding(this);
20687 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20688 %}
20689 ins_pipe( pipe_slow );
20690 %}
20691
20692 // Longs vector sub
20693 instruct vsubL(vec dst, vec src) %{
20694 predicate(UseAVX == 0);
20695 match(Set dst (SubVL dst src));
20696 format %{ "psubq $dst,$src\t! sub packedL" %}
20697 ins_encode %{
20698 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20699 %}
20700 ins_pipe( pipe_slow );
20701 %}
20702
20703 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20704 predicate(UseAVX > 0);
20705 match(Set dst (SubVL src1 src2));
20706 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20707 ins_encode %{
20708 int vlen_enc = vector_length_encoding(this);
20709 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20710 %}
20711 ins_pipe( pipe_slow );
20712 %}
20713
20714
20715 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20716 predicate((UseAVX > 0) &&
20717 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20718 match(Set dst (SubVL src (LoadVector mem)));
20719 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20720 ins_encode %{
20721 int vlen_enc = vector_length_encoding(this);
20722 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20723 %}
20724 ins_pipe( pipe_slow );
20725 %}
20726
20727 // Floats vector sub
20728 instruct vsubF(vec dst, vec src) %{
20729 predicate(UseAVX == 0);
20730 match(Set dst (SubVF dst src));
20731 format %{ "subps $dst,$src\t! sub packedF" %}
20732 ins_encode %{
20733 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20734 %}
20735 ins_pipe( pipe_slow );
20736 %}
20737
20738 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20739 predicate(UseAVX > 0);
20740 match(Set dst (SubVF src1 src2));
20741 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20742 ins_encode %{
20743 int vlen_enc = vector_length_encoding(this);
20744 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20745 %}
20746 ins_pipe( pipe_slow );
20747 %}
20748
20749 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20750 predicate((UseAVX > 0) &&
20751 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20752 match(Set dst (SubVF src (LoadVector mem)));
20753 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20754 ins_encode %{
20755 int vlen_enc = vector_length_encoding(this);
20756 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20757 %}
20758 ins_pipe( pipe_slow );
20759 %}
20760
20761 // Doubles vector sub
20762 instruct vsubD(vec dst, vec src) %{
20763 predicate(UseAVX == 0);
20764 match(Set dst (SubVD dst src));
20765 format %{ "subpd $dst,$src\t! sub packedD" %}
20766 ins_encode %{
20767 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20768 %}
20769 ins_pipe( pipe_slow );
20770 %}
20771
20772 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20773 predicate(UseAVX > 0);
20774 match(Set dst (SubVD src1 src2));
20775 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20776 ins_encode %{
20777 int vlen_enc = vector_length_encoding(this);
20778 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20779 %}
20780 ins_pipe( pipe_slow );
20781 %}
20782
20783 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20784 predicate((UseAVX > 0) &&
20785 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20786 match(Set dst (SubVD src (LoadVector mem)));
20787 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20788 ins_encode %{
20789 int vlen_enc = vector_length_encoding(this);
20790 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20791 %}
20792 ins_pipe( pipe_slow );
20793 %}
20794
20795 // --------------------------------- MUL --------------------------------------
20796
20797 // Byte vector mul
20798 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20799 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20800 match(Set dst (MulVB src1 src2));
20801 effect(TEMP dst, TEMP xtmp);
20802 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20803 ins_encode %{
20804 assert(UseSSE > 3, "required");
20805 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20806 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20807 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20808 __ psllw($dst$$XMMRegister, 8);
20809 __ psrlw($dst$$XMMRegister, 8);
20810 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20811 %}
20812 ins_pipe( pipe_slow );
20813 %}
20814
20815 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20816 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20817 match(Set dst (MulVB src1 src2));
20818 effect(TEMP dst, TEMP xtmp);
20819 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20820 ins_encode %{
20821 assert(UseSSE > 3, "required");
20822 // Odd-index elements
20823 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20824 __ psrlw($dst$$XMMRegister, 8);
20825 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20826 __ psrlw($xtmp$$XMMRegister, 8);
20827 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20828 __ psllw($dst$$XMMRegister, 8);
20829 // Even-index elements
20830 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20831 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20832 __ psllw($xtmp$$XMMRegister, 8);
20833 __ psrlw($xtmp$$XMMRegister, 8);
20834 // Combine
20835 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20836 %}
20837 ins_pipe( pipe_slow );
20838 %}
20839
20840 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20841 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20842 match(Set dst (MulVB src1 src2));
20843 effect(TEMP xtmp1, TEMP xtmp2);
20844 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20845 ins_encode %{
20846 int vlen_enc = vector_length_encoding(this);
20847 // Odd-index elements
20848 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20849 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20850 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20851 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20852 // Even-index elements
20853 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20854 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20855 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20856 // Combine
20857 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20858 %}
20859 ins_pipe( pipe_slow );
20860 %}
20861
20862 // Shorts/Chars vector mul
20863 instruct vmulS(vec dst, vec src) %{
20864 predicate(UseAVX == 0);
20865 match(Set dst (MulVS dst src));
20866 format %{ "pmullw $dst,$src\t! mul packedS" %}
20867 ins_encode %{
20868 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20869 %}
20870 ins_pipe( pipe_slow );
20871 %}
20872
20873 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20874 predicate(UseAVX > 0);
20875 match(Set dst (MulVS src1 src2));
20876 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20877 ins_encode %{
20878 int vlen_enc = vector_length_encoding(this);
20879 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20880 %}
20881 ins_pipe( pipe_slow );
20882 %}
20883
20884 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20885 predicate((UseAVX > 0) &&
20886 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20887 match(Set dst (MulVS src (LoadVector mem)));
20888 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20889 ins_encode %{
20890 int vlen_enc = vector_length_encoding(this);
20891 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20892 %}
20893 ins_pipe( pipe_slow );
20894 %}
20895
20896 // Integers vector mul
20897 instruct vmulI(vec dst, vec src) %{
20898 predicate(UseAVX == 0);
20899 match(Set dst (MulVI dst src));
20900 format %{ "pmulld $dst,$src\t! mul packedI" %}
20901 ins_encode %{
20902 assert(UseSSE > 3, "required");
20903 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20904 %}
20905 ins_pipe( pipe_slow );
20906 %}
20907
20908 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20909 predicate(UseAVX > 0);
20910 match(Set dst (MulVI src1 src2));
20911 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20912 ins_encode %{
20913 int vlen_enc = vector_length_encoding(this);
20914 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20915 %}
20916 ins_pipe( pipe_slow );
20917 %}
20918
20919 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20920 predicate((UseAVX > 0) &&
20921 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20922 match(Set dst (MulVI src (LoadVector mem)));
20923 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20924 ins_encode %{
20925 int vlen_enc = vector_length_encoding(this);
20926 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20927 %}
20928 ins_pipe( pipe_slow );
20929 %}
20930
20931 // Longs vector mul
20932 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20933 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20934 VM_Version::supports_avx512dq()) ||
20935 VM_Version::supports_avx512vldq());
20936 match(Set dst (MulVL src1 src2));
20937 ins_cost(500);
20938 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20939 ins_encode %{
20940 assert(UseAVX > 2, "required");
20941 int vlen_enc = vector_length_encoding(this);
20942 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20943 %}
20944 ins_pipe( pipe_slow );
20945 %}
20946
20947 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20948 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20949 VM_Version::supports_avx512dq()) ||
20950 (Matcher::vector_length_in_bytes(n) > 8 &&
20951 VM_Version::supports_avx512vldq()));
20952 match(Set dst (MulVL src (LoadVector mem)));
20953 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20954 ins_cost(500);
20955 ins_encode %{
20956 assert(UseAVX > 2, "required");
20957 int vlen_enc = vector_length_encoding(this);
20958 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20959 %}
20960 ins_pipe( pipe_slow );
20961 %}
20962
20963 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20964 predicate(UseAVX == 0);
20965 match(Set dst (MulVL src1 src2));
20966 ins_cost(500);
20967 effect(TEMP dst, TEMP xtmp);
20968 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20969 ins_encode %{
20970 assert(VM_Version::supports_sse4_1(), "required");
20971 // Get the lo-hi products, only the lower 32 bits is in concerns
20972 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20973 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20974 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20975 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20976 __ psllq($dst$$XMMRegister, 32);
20977 // Get the lo-lo products
20978 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20979 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20980 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20981 %}
20982 ins_pipe( pipe_slow );
20983 %}
20984
20985 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20986 predicate(UseAVX > 0 &&
20987 ((Matcher::vector_length_in_bytes(n) == 64 &&
20988 !VM_Version::supports_avx512dq()) ||
20989 (Matcher::vector_length_in_bytes(n) < 64 &&
20990 !VM_Version::supports_avx512vldq())));
20991 match(Set dst (MulVL src1 src2));
20992 effect(TEMP xtmp1, TEMP xtmp2);
20993 ins_cost(500);
20994 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20995 ins_encode %{
20996 int vlen_enc = vector_length_encoding(this);
20997 // Get the lo-hi products, only the lower 32 bits is in concerns
20998 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20999 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21000 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
21001 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21002 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
21003 // Get the lo-lo products
21004 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21005 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21006 %}
21007 ins_pipe( pipe_slow );
21008 %}
21009
21010 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
21011 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
21012 match(Set dst (MulVL src1 src2));
21013 ins_cost(100);
21014 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
21015 ins_encode %{
21016 int vlen_enc = vector_length_encoding(this);
21017 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21018 %}
21019 ins_pipe( pipe_slow );
21020 %}
21021
21022 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
21023 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
21024 match(Set dst (MulVL src1 src2));
21025 ins_cost(100);
21026 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21027 ins_encode %{
21028 int vlen_enc = vector_length_encoding(this);
21029 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21030 %}
21031 ins_pipe( pipe_slow );
21032 %}
21033
21034 // Floats vector mul
21035 instruct vmulF(vec dst, vec src) %{
21036 predicate(UseAVX == 0);
21037 match(Set dst (MulVF dst src));
21038 format %{ "mulps $dst,$src\t! mul packedF" %}
21039 ins_encode %{
21040 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21041 %}
21042 ins_pipe( pipe_slow );
21043 %}
21044
21045 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21046 predicate(UseAVX > 0);
21047 match(Set dst (MulVF src1 src2));
21048 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
21049 ins_encode %{
21050 int vlen_enc = vector_length_encoding(this);
21051 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21052 %}
21053 ins_pipe( pipe_slow );
21054 %}
21055
21056 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21057 predicate((UseAVX > 0) &&
21058 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21059 match(Set dst (MulVF src (LoadVector mem)));
21060 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
21061 ins_encode %{
21062 int vlen_enc = vector_length_encoding(this);
21063 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21064 %}
21065 ins_pipe( pipe_slow );
21066 %}
21067
21068 // Doubles vector mul
21069 instruct vmulD(vec dst, vec src) %{
21070 predicate(UseAVX == 0);
21071 match(Set dst (MulVD dst src));
21072 format %{ "mulpd $dst,$src\t! mul packedD" %}
21073 ins_encode %{
21074 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21075 %}
21076 ins_pipe( pipe_slow );
21077 %}
21078
21079 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21080 predicate(UseAVX > 0);
21081 match(Set dst (MulVD src1 src2));
21082 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
21083 ins_encode %{
21084 int vlen_enc = vector_length_encoding(this);
21085 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21086 %}
21087 ins_pipe( pipe_slow );
21088 %}
21089
21090 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21091 predicate((UseAVX > 0) &&
21092 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21093 match(Set dst (MulVD src (LoadVector mem)));
21094 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
21095 ins_encode %{
21096 int vlen_enc = vector_length_encoding(this);
21097 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21098 %}
21099 ins_pipe( pipe_slow );
21100 %}
21101
21102 // --------------------------------- DIV --------------------------------------
21103
21104 // Floats vector div
21105 instruct vdivF(vec dst, vec src) %{
21106 predicate(UseAVX == 0);
21107 match(Set dst (DivVF dst src));
21108 format %{ "divps $dst,$src\t! div packedF" %}
21109 ins_encode %{
21110 __ divps($dst$$XMMRegister, $src$$XMMRegister);
21111 %}
21112 ins_pipe( pipe_slow );
21113 %}
21114
21115 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21116 predicate(UseAVX > 0);
21117 match(Set dst (DivVF src1 src2));
21118 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
21119 ins_encode %{
21120 int vlen_enc = vector_length_encoding(this);
21121 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21122 %}
21123 ins_pipe( pipe_slow );
21124 %}
21125
21126 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21127 predicate((UseAVX > 0) &&
21128 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21129 match(Set dst (DivVF src (LoadVector mem)));
21130 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
21131 ins_encode %{
21132 int vlen_enc = vector_length_encoding(this);
21133 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21134 %}
21135 ins_pipe( pipe_slow );
21136 %}
21137
21138 // Doubles vector div
21139 instruct vdivD(vec dst, vec src) %{
21140 predicate(UseAVX == 0);
21141 match(Set dst (DivVD dst src));
21142 format %{ "divpd $dst,$src\t! div packedD" %}
21143 ins_encode %{
21144 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21145 %}
21146 ins_pipe( pipe_slow );
21147 %}
21148
21149 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21150 predicate(UseAVX > 0);
21151 match(Set dst (DivVD src1 src2));
21152 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
21153 ins_encode %{
21154 int vlen_enc = vector_length_encoding(this);
21155 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21156 %}
21157 ins_pipe( pipe_slow );
21158 %}
21159
21160 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21161 predicate((UseAVX > 0) &&
21162 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21163 match(Set dst (DivVD src (LoadVector mem)));
21164 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
21165 ins_encode %{
21166 int vlen_enc = vector_length_encoding(this);
21167 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21168 %}
21169 ins_pipe( pipe_slow );
21170 %}
21171
21172 // ------------------------------ MinMax ---------------------------------------
21173
21174 // Byte, Short, Int vector Min/Max
21175 instruct minmax_reg_sse(vec dst, vec src) %{
21176 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21177 UseAVX == 0);
21178 match(Set dst (MinV dst src));
21179 match(Set dst (MaxV dst src));
21180 format %{ "vector_minmax $dst,$src\t! " %}
21181 ins_encode %{
21182 assert(UseSSE >= 4, "required");
21183
21184 int opcode = this->ideal_Opcode();
21185 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21186 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21187 %}
21188 ins_pipe( pipe_slow );
21189 %}
21190
21191 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21192 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21193 UseAVX > 0);
21194 match(Set dst (MinV src1 src2));
21195 match(Set dst (MaxV src1 src2));
21196 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21197 ins_encode %{
21198 int opcode = this->ideal_Opcode();
21199 int vlen_enc = vector_length_encoding(this);
21200 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21201
21202 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21203 %}
21204 ins_pipe( pipe_slow );
21205 %}
21206
21207 // Long vector Min/Max
21208 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21209 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21210 UseAVX == 0);
21211 match(Set dst (MinV dst src));
21212 match(Set dst (MaxV src dst));
21213 effect(TEMP dst, TEMP tmp);
21214 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21215 ins_encode %{
21216 assert(UseSSE >= 4, "required");
21217
21218 int opcode = this->ideal_Opcode();
21219 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21220 assert(elem_bt == T_LONG, "sanity");
21221
21222 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21223 %}
21224 ins_pipe( pipe_slow );
21225 %}
21226
21227 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21228 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21229 UseAVX > 0 && !VM_Version::supports_avx512vl());
21230 match(Set dst (MinV src1 src2));
21231 match(Set dst (MaxV src1 src2));
21232 effect(TEMP dst);
21233 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21234 ins_encode %{
21235 int vlen_enc = vector_length_encoding(this);
21236 int opcode = this->ideal_Opcode();
21237 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21238 assert(elem_bt == T_LONG, "sanity");
21239
21240 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21241 %}
21242 ins_pipe( pipe_slow );
21243 %}
21244
21245 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21246 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21247 Matcher::vector_element_basic_type(n) == T_LONG);
21248 match(Set dst (MinV src1 src2));
21249 match(Set dst (MaxV src1 src2));
21250 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21251 ins_encode %{
21252 assert(UseAVX > 2, "required");
21253
21254 int vlen_enc = vector_length_encoding(this);
21255 int opcode = this->ideal_Opcode();
21256 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21257 assert(elem_bt == T_LONG, "sanity");
21258
21259 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21260 %}
21261 ins_pipe( pipe_slow );
21262 %}
21263
21264 // Float/Double vector Min/Max
21265 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21266 predicate(VM_Version::supports_avx10_2() &&
21267 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21268 match(Set dst (MinV a b));
21269 match(Set dst (MaxV a b));
21270 format %{ "vector_minmaxFP $dst, $a, $b" %}
21271 ins_encode %{
21272 int vlen_enc = vector_length_encoding(this);
21273 int opcode = this->ideal_Opcode();
21274 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21275 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21276 %}
21277 ins_pipe( pipe_slow );
21278 %}
21279
21280 // Float/Double vector Min/Max
21281 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21282 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21283 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21284 UseAVX > 0);
21285 match(Set dst (MinV a b));
21286 match(Set dst (MaxV a b));
21287 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21288 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21289 ins_encode %{
21290 assert(UseAVX > 0, "required");
21291
21292 int opcode = this->ideal_Opcode();
21293 int vlen_enc = vector_length_encoding(this);
21294 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21295
21296 __ vminmax_fp(opcode, elem_bt,
21297 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21298 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21299 %}
21300 ins_pipe( pipe_slow );
21301 %}
21302
21303 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21304 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21305 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21306 match(Set dst (MinV a b));
21307 match(Set dst (MaxV a b));
21308 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21309 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21310 ins_encode %{
21311 assert(UseAVX > 2, "required");
21312
21313 int opcode = this->ideal_Opcode();
21314 int vlen_enc = vector_length_encoding(this);
21315 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21316
21317 __ evminmax_fp(opcode, elem_bt,
21318 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21319 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21320 %}
21321 ins_pipe( pipe_slow );
21322 %}
21323
21324 // ------------------------------ Unsigned vector Min/Max ----------------------
21325
21326 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21327 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21328 match(Set dst (UMinV a b));
21329 match(Set dst (UMaxV a b));
21330 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21331 ins_encode %{
21332 int opcode = this->ideal_Opcode();
21333 int vlen_enc = vector_length_encoding(this);
21334 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21335 assert(is_integral_type(elem_bt), "");
21336 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21337 %}
21338 ins_pipe( pipe_slow );
21339 %}
21340
21341 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21342 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21343 match(Set dst (UMinV a (LoadVector b)));
21344 match(Set dst (UMaxV a (LoadVector b)));
21345 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21346 ins_encode %{
21347 int opcode = this->ideal_Opcode();
21348 int vlen_enc = vector_length_encoding(this);
21349 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21350 assert(is_integral_type(elem_bt), "");
21351 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21352 %}
21353 ins_pipe( pipe_slow );
21354 %}
21355
21356 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21357 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21358 match(Set dst (UMinV a b));
21359 match(Set dst (UMaxV a b));
21360 effect(TEMP xtmp1, TEMP xtmp2);
21361 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21362 ins_encode %{
21363 int opcode = this->ideal_Opcode();
21364 int vlen_enc = vector_length_encoding(this);
21365 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21366 %}
21367 ins_pipe( pipe_slow );
21368 %}
21369
21370 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21371 match(Set dst (UMinV (Binary dst src2) mask));
21372 match(Set dst (UMaxV (Binary dst src2) mask));
21373 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21374 ins_encode %{
21375 int vlen_enc = vector_length_encoding(this);
21376 BasicType bt = Matcher::vector_element_basic_type(this);
21377 int opc = this->ideal_Opcode();
21378 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21379 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21380 %}
21381 ins_pipe( pipe_slow );
21382 %}
21383
21384 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21385 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21386 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21387 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21388 ins_encode %{
21389 int vlen_enc = vector_length_encoding(this);
21390 BasicType bt = Matcher::vector_element_basic_type(this);
21391 int opc = this->ideal_Opcode();
21392 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21393 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21394 %}
21395 ins_pipe( pipe_slow );
21396 %}
21397
21398 // --------------------------------- Signum/CopySign ---------------------------
21399
21400 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21401 match(Set dst (SignumF dst (Binary zero one)));
21402 effect(KILL cr);
21403 format %{ "signumF $dst, $dst" %}
21404 ins_encode %{
21405 int opcode = this->ideal_Opcode();
21406 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21407 %}
21408 ins_pipe( pipe_slow );
21409 %}
21410
21411 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21412 match(Set dst (SignumD dst (Binary zero one)));
21413 effect(KILL cr);
21414 format %{ "signumD $dst, $dst" %}
21415 ins_encode %{
21416 int opcode = this->ideal_Opcode();
21417 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21418 %}
21419 ins_pipe( pipe_slow );
21420 %}
21421
21422 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21423 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21424 match(Set dst (SignumVF src (Binary zero one)));
21425 match(Set dst (SignumVD src (Binary zero one)));
21426 effect(TEMP dst, TEMP xtmp1);
21427 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21428 ins_encode %{
21429 int opcode = this->ideal_Opcode();
21430 int vec_enc = vector_length_encoding(this);
21431 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21432 $xtmp1$$XMMRegister, vec_enc);
21433 %}
21434 ins_pipe( pipe_slow );
21435 %}
21436
21437 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21438 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21439 match(Set dst (SignumVF src (Binary zero one)));
21440 match(Set dst (SignumVD src (Binary zero one)));
21441 effect(TEMP dst, TEMP ktmp1);
21442 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21443 ins_encode %{
21444 int opcode = this->ideal_Opcode();
21445 int vec_enc = vector_length_encoding(this);
21446 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21447 $ktmp1$$KRegister, vec_enc);
21448 %}
21449 ins_pipe( pipe_slow );
21450 %}
21451
21452 // ---------------------------------------
21453 // For copySign use 0xE4 as writemask for vpternlog
21454 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21455 // C (xmm2) is set to 0x7FFFFFFF
21456 // Wherever xmm2 is 0, we want to pick from B (sign)
21457 // Wherever xmm2 is 1, we want to pick from A (src)
21458 //
21459 // A B C Result
21460 // 0 0 0 0
21461 // 0 0 1 0
21462 // 0 1 0 1
21463 // 0 1 1 0
21464 // 1 0 0 0
21465 // 1 0 1 1
21466 // 1 1 0 1
21467 // 1 1 1 1
21468 //
21469 // Result going from high bit to low bit is 0x11100100 = 0xe4
21470 // ---------------------------------------
21471
21472 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21473 match(Set dst (CopySignF dst src));
21474 effect(TEMP tmp1, TEMP tmp2);
21475 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21476 ins_encode %{
21477 __ movl($tmp2$$Register, 0x7FFFFFFF);
21478 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21479 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21480 %}
21481 ins_pipe( pipe_slow );
21482 %}
21483
21484 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21485 match(Set dst (CopySignD dst (Binary src zero)));
21486 ins_cost(100);
21487 effect(TEMP tmp1, TEMP tmp2);
21488 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21489 ins_encode %{
21490 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21491 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21492 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21493 %}
21494 ins_pipe( pipe_slow );
21495 %}
21496
21497 //----------------------------- CompressBits/ExpandBits ------------------------
21498
21499 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21500 predicate(n->bottom_type()->isa_int());
21501 match(Set dst (CompressBits src mask));
21502 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21503 ins_encode %{
21504 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21505 %}
21506 ins_pipe( pipe_slow );
21507 %}
21508
21509 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21510 predicate(n->bottom_type()->isa_int());
21511 match(Set dst (ExpandBits src mask));
21512 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21513 ins_encode %{
21514 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21515 %}
21516 ins_pipe( pipe_slow );
21517 %}
21518
21519 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21520 predicate(n->bottom_type()->isa_int());
21521 match(Set dst (CompressBits src (LoadI mask)));
21522 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21523 ins_encode %{
21524 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21525 %}
21526 ins_pipe( pipe_slow );
21527 %}
21528
21529 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21530 predicate(n->bottom_type()->isa_int());
21531 match(Set dst (ExpandBits src (LoadI mask)));
21532 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21533 ins_encode %{
21534 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21535 %}
21536 ins_pipe( pipe_slow );
21537 %}
21538
21539 // --------------------------------- Sqrt --------------------------------------
21540
21541 instruct vsqrtF_reg(vec dst, vec src) %{
21542 match(Set dst (SqrtVF src));
21543 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21544 ins_encode %{
21545 assert(UseAVX > 0, "required");
21546 int vlen_enc = vector_length_encoding(this);
21547 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21548 %}
21549 ins_pipe( pipe_slow );
21550 %}
21551
21552 instruct vsqrtF_mem(vec dst, memory mem) %{
21553 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21554 match(Set dst (SqrtVF (LoadVector mem)));
21555 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21556 ins_encode %{
21557 assert(UseAVX > 0, "required");
21558 int vlen_enc = vector_length_encoding(this);
21559 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21560 %}
21561 ins_pipe( pipe_slow );
21562 %}
21563
21564 // Floating point vector sqrt
21565 instruct vsqrtD_reg(vec dst, vec src) %{
21566 match(Set dst (SqrtVD src));
21567 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21568 ins_encode %{
21569 assert(UseAVX > 0, "required");
21570 int vlen_enc = vector_length_encoding(this);
21571 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21572 %}
21573 ins_pipe( pipe_slow );
21574 %}
21575
21576 instruct vsqrtD_mem(vec dst, memory mem) %{
21577 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21578 match(Set dst (SqrtVD (LoadVector mem)));
21579 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21580 ins_encode %{
21581 assert(UseAVX > 0, "required");
21582 int vlen_enc = vector_length_encoding(this);
21583 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21584 %}
21585 ins_pipe( pipe_slow );
21586 %}
21587
21588 // ------------------------------ Shift ---------------------------------------
21589
21590 // Left and right shift count vectors are the same on x86
21591 // (only lowest bits of xmm reg are used for count).
21592 instruct vshiftcnt(vec dst, rRegI cnt) %{
21593 match(Set dst (LShiftCntV cnt));
21594 match(Set dst (RShiftCntV cnt));
21595 format %{ "movdl $dst,$cnt\t! load shift count" %}
21596 ins_encode %{
21597 __ movdl($dst$$XMMRegister, $cnt$$Register);
21598 %}
21599 ins_pipe( pipe_slow );
21600 %}
21601
21602 // Byte vector shift
21603 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21604 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21605 match(Set dst ( LShiftVB src shift));
21606 match(Set dst ( RShiftVB src shift));
21607 match(Set dst (URShiftVB src shift));
21608 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21609 format %{"vector_byte_shift $dst,$src,$shift" %}
21610 ins_encode %{
21611 assert(UseSSE > 3, "required");
21612 int opcode = this->ideal_Opcode();
21613 bool sign = (opcode != Op_URShiftVB);
21614 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21615 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21616 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21617 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21618 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21619 %}
21620 ins_pipe( pipe_slow );
21621 %}
21622
21623 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21624 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21625 UseAVX <= 1);
21626 match(Set dst ( LShiftVB src shift));
21627 match(Set dst ( RShiftVB src shift));
21628 match(Set dst (URShiftVB src shift));
21629 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21630 format %{"vector_byte_shift $dst,$src,$shift" %}
21631 ins_encode %{
21632 assert(UseSSE > 3, "required");
21633 int opcode = this->ideal_Opcode();
21634 bool sign = (opcode != Op_URShiftVB);
21635 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21636 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21637 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21638 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21639 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21640 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21641 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21642 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21643 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21644 %}
21645 ins_pipe( pipe_slow );
21646 %}
21647
21648 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21649 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21650 UseAVX > 1);
21651 match(Set dst ( LShiftVB src shift));
21652 match(Set dst ( RShiftVB src shift));
21653 match(Set dst (URShiftVB src shift));
21654 effect(TEMP dst, TEMP tmp);
21655 format %{"vector_byte_shift $dst,$src,$shift" %}
21656 ins_encode %{
21657 int opcode = this->ideal_Opcode();
21658 bool sign = (opcode != Op_URShiftVB);
21659 int vlen_enc = Assembler::AVX_256bit;
21660 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21661 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21662 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21663 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21664 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21665 %}
21666 ins_pipe( pipe_slow );
21667 %}
21668
21669 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21670 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21671 match(Set dst ( LShiftVB src shift));
21672 match(Set dst ( RShiftVB src shift));
21673 match(Set dst (URShiftVB src shift));
21674 effect(TEMP dst, TEMP tmp);
21675 format %{"vector_byte_shift $dst,$src,$shift" %}
21676 ins_encode %{
21677 assert(UseAVX > 1, "required");
21678 int opcode = this->ideal_Opcode();
21679 bool sign = (opcode != Op_URShiftVB);
21680 int vlen_enc = Assembler::AVX_256bit;
21681 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21682 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21683 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21684 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21685 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21686 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21687 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21688 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21689 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21690 %}
21691 ins_pipe( pipe_slow );
21692 %}
21693
21694 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21695 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21696 match(Set dst ( LShiftVB src shift));
21697 match(Set dst (RShiftVB src shift));
21698 match(Set dst (URShiftVB src shift));
21699 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21700 format %{"vector_byte_shift $dst,$src,$shift" %}
21701 ins_encode %{
21702 assert(UseAVX > 2, "required");
21703 int opcode = this->ideal_Opcode();
21704 bool sign = (opcode != Op_URShiftVB);
21705 int vlen_enc = Assembler::AVX_512bit;
21706 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21707 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21708 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21709 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21710 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21711 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21712 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21713 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21714 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21715 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21716 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21717 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21718 %}
21719 ins_pipe( pipe_slow );
21720 %}
21721
21722 // Shorts vector logical right shift produces incorrect Java result
21723 // for negative data because java code convert short value into int with
21724 // sign extension before a shift. But char vectors are fine since chars are
21725 // unsigned values.
21726 // Shorts/Chars vector left shift
21727 instruct vshiftS(vec dst, vec src, vec shift) %{
21728 predicate(!n->as_ShiftV()->is_var_shift());
21729 match(Set dst ( LShiftVS src shift));
21730 match(Set dst ( RShiftVS src shift));
21731 match(Set dst (URShiftVS src shift));
21732 effect(TEMP dst, USE src, USE shift);
21733 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21734 ins_encode %{
21735 int opcode = this->ideal_Opcode();
21736 if (UseAVX > 0) {
21737 int vlen_enc = vector_length_encoding(this);
21738 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21739 } else {
21740 int vlen = Matcher::vector_length(this);
21741 if (vlen == 2) {
21742 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21743 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21744 } else if (vlen == 4) {
21745 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21746 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21747 } else {
21748 assert (vlen == 8, "sanity");
21749 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21750 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21751 }
21752 }
21753 %}
21754 ins_pipe( pipe_slow );
21755 %}
21756
21757 // Integers vector left shift
21758 instruct vshiftI(vec dst, vec src, vec shift) %{
21759 predicate(!n->as_ShiftV()->is_var_shift());
21760 match(Set dst ( LShiftVI src shift));
21761 match(Set dst ( RShiftVI src shift));
21762 match(Set dst (URShiftVI src shift));
21763 effect(TEMP dst, USE src, USE shift);
21764 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21765 ins_encode %{
21766 int opcode = this->ideal_Opcode();
21767 if (UseAVX > 0) {
21768 int vlen_enc = vector_length_encoding(this);
21769 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21770 } else {
21771 int vlen = Matcher::vector_length(this);
21772 if (vlen == 2) {
21773 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21774 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21775 } else {
21776 assert(vlen == 4, "sanity");
21777 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21778 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21779 }
21780 }
21781 %}
21782 ins_pipe( pipe_slow );
21783 %}
21784
21785 // Integers vector left constant shift
21786 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21787 match(Set dst (LShiftVI src (LShiftCntV shift)));
21788 match(Set dst (RShiftVI src (RShiftCntV shift)));
21789 match(Set dst (URShiftVI src (RShiftCntV shift)));
21790 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21791 ins_encode %{
21792 int opcode = this->ideal_Opcode();
21793 if (UseAVX > 0) {
21794 int vector_len = vector_length_encoding(this);
21795 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21796 } else {
21797 int vlen = Matcher::vector_length(this);
21798 if (vlen == 2) {
21799 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21800 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21801 } else {
21802 assert(vlen == 4, "sanity");
21803 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21804 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21805 }
21806 }
21807 %}
21808 ins_pipe( pipe_slow );
21809 %}
21810
21811 // Longs vector shift
21812 instruct vshiftL(vec dst, vec src, vec shift) %{
21813 predicate(!n->as_ShiftV()->is_var_shift());
21814 match(Set dst ( LShiftVL src shift));
21815 match(Set dst (URShiftVL src shift));
21816 effect(TEMP dst, USE src, USE shift);
21817 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21818 ins_encode %{
21819 int opcode = this->ideal_Opcode();
21820 if (UseAVX > 0) {
21821 int vlen_enc = vector_length_encoding(this);
21822 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21823 } else {
21824 assert(Matcher::vector_length(this) == 2, "");
21825 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21826 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21827 }
21828 %}
21829 ins_pipe( pipe_slow );
21830 %}
21831
21832 // Longs vector constant shift
21833 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21834 match(Set dst (LShiftVL src (LShiftCntV shift)));
21835 match(Set dst (URShiftVL src (RShiftCntV shift)));
21836 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21837 ins_encode %{
21838 int opcode = this->ideal_Opcode();
21839 if (UseAVX > 0) {
21840 int vector_len = vector_length_encoding(this);
21841 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21842 } else {
21843 assert(Matcher::vector_length(this) == 2, "");
21844 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21845 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21846 }
21847 %}
21848 ins_pipe( pipe_slow );
21849 %}
21850
21851 // -------------------ArithmeticRightShift -----------------------------------
21852 // Long vector arithmetic right shift
21853 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21854 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21855 match(Set dst (RShiftVL src shift));
21856 effect(TEMP dst, TEMP tmp);
21857 format %{ "vshiftq $dst,$src,$shift" %}
21858 ins_encode %{
21859 uint vlen = Matcher::vector_length(this);
21860 if (vlen == 2) {
21861 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21862 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21863 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21864 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21865 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21866 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21867 } else {
21868 assert(vlen == 4, "sanity");
21869 assert(UseAVX > 1, "required");
21870 int vlen_enc = Assembler::AVX_256bit;
21871 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21872 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21873 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21874 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21875 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21876 }
21877 %}
21878 ins_pipe( pipe_slow );
21879 %}
21880
21881 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21882 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21883 match(Set dst (RShiftVL src shift));
21884 format %{ "vshiftq $dst,$src,$shift" %}
21885 ins_encode %{
21886 int vlen_enc = vector_length_encoding(this);
21887 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21888 %}
21889 ins_pipe( pipe_slow );
21890 %}
21891
21892 // ------------------- Variable Shift -----------------------------
21893 // Byte variable shift
21894 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21895 predicate(Matcher::vector_length(n) <= 8 &&
21896 n->as_ShiftV()->is_var_shift() &&
21897 !VM_Version::supports_avx512bw());
21898 match(Set dst ( LShiftVB src shift));
21899 match(Set dst ( RShiftVB src shift));
21900 match(Set dst (URShiftVB src shift));
21901 effect(TEMP dst, TEMP vtmp);
21902 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21903 ins_encode %{
21904 assert(UseAVX >= 2, "required");
21905
21906 int opcode = this->ideal_Opcode();
21907 int vlen_enc = Assembler::AVX_128bit;
21908 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21909 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21910 %}
21911 ins_pipe( pipe_slow );
21912 %}
21913
21914 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21915 predicate(Matcher::vector_length(n) == 16 &&
21916 n->as_ShiftV()->is_var_shift() &&
21917 !VM_Version::supports_avx512bw());
21918 match(Set dst ( LShiftVB src shift));
21919 match(Set dst ( RShiftVB src shift));
21920 match(Set dst (URShiftVB src shift));
21921 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21922 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21923 ins_encode %{
21924 assert(UseAVX >= 2, "required");
21925
21926 int opcode = this->ideal_Opcode();
21927 int vlen_enc = Assembler::AVX_128bit;
21928 // Shift lower half and get word result in dst
21929 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21930
21931 // Shift upper half and get word result in vtmp1
21932 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21933 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21934 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21935
21936 // Merge and down convert the two word results to byte in dst
21937 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21938 %}
21939 ins_pipe( pipe_slow );
21940 %}
21941
21942 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21943 predicate(Matcher::vector_length(n) == 32 &&
21944 n->as_ShiftV()->is_var_shift() &&
21945 !VM_Version::supports_avx512bw());
21946 match(Set dst ( LShiftVB src shift));
21947 match(Set dst ( RShiftVB src shift));
21948 match(Set dst (URShiftVB src shift));
21949 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21950 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21951 ins_encode %{
21952 assert(UseAVX >= 2, "required");
21953
21954 int opcode = this->ideal_Opcode();
21955 int vlen_enc = Assembler::AVX_128bit;
21956 // Process lower 128 bits and get result in dst
21957 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21958 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21959 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21960 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21961 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21962
21963 // Process higher 128 bits and get result in vtmp3
21964 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21965 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21966 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21967 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21968 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21969 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21970 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21971
21972 // Merge the two results in dst
21973 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21974 %}
21975 ins_pipe( pipe_slow );
21976 %}
21977
21978 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21979 predicate(Matcher::vector_length(n) <= 32 &&
21980 n->as_ShiftV()->is_var_shift() &&
21981 VM_Version::supports_avx512bw());
21982 match(Set dst ( LShiftVB src shift));
21983 match(Set dst ( RShiftVB src shift));
21984 match(Set dst (URShiftVB src shift));
21985 effect(TEMP dst, TEMP vtmp);
21986 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21987 ins_encode %{
21988 assert(UseAVX > 2, "required");
21989
21990 int opcode = this->ideal_Opcode();
21991 int vlen_enc = vector_length_encoding(this);
21992 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21993 %}
21994 ins_pipe( pipe_slow );
21995 %}
21996
21997 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21998 predicate(Matcher::vector_length(n) == 64 &&
21999 n->as_ShiftV()->is_var_shift() &&
22000 VM_Version::supports_avx512bw());
22001 match(Set dst ( LShiftVB src shift));
22002 match(Set dst ( RShiftVB src shift));
22003 match(Set dst (URShiftVB src shift));
22004 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22005 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
22006 ins_encode %{
22007 assert(UseAVX > 2, "required");
22008
22009 int opcode = this->ideal_Opcode();
22010 int vlen_enc = Assembler::AVX_256bit;
22011 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
22012 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
22013 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
22014 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
22015 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
22016 %}
22017 ins_pipe( pipe_slow );
22018 %}
22019
22020 // Short variable shift
22021 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
22022 predicate(Matcher::vector_length(n) <= 8 &&
22023 n->as_ShiftV()->is_var_shift() &&
22024 !VM_Version::supports_avx512bw());
22025 match(Set dst ( LShiftVS src shift));
22026 match(Set dst ( RShiftVS src shift));
22027 match(Set dst (URShiftVS src shift));
22028 effect(TEMP dst, TEMP vtmp);
22029 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22030 ins_encode %{
22031 assert(UseAVX >= 2, "required");
22032
22033 int opcode = this->ideal_Opcode();
22034 bool sign = (opcode != Op_URShiftVS);
22035 int vlen_enc = Assembler::AVX_256bit;
22036 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22037 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22038 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22039 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22040 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22041 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22042 %}
22043 ins_pipe( pipe_slow );
22044 %}
22045
22046 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22047 predicate(Matcher::vector_length(n) == 16 &&
22048 n->as_ShiftV()->is_var_shift() &&
22049 !VM_Version::supports_avx512bw());
22050 match(Set dst ( LShiftVS src shift));
22051 match(Set dst ( RShiftVS src shift));
22052 match(Set dst (URShiftVS src shift));
22053 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22054 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22055 ins_encode %{
22056 assert(UseAVX >= 2, "required");
22057
22058 int opcode = this->ideal_Opcode();
22059 bool sign = (opcode != Op_URShiftVS);
22060 int vlen_enc = Assembler::AVX_256bit;
22061 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22062 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22063 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22064 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22065 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22066
22067 // Shift upper half, with result in dst using vtmp1 as TEMP
22068 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22069 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22070 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22071 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22072 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22073 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22074
22075 // Merge lower and upper half result into dst
22076 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22077 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22078 %}
22079 ins_pipe( pipe_slow );
22080 %}
22081
22082 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22083 predicate(n->as_ShiftV()->is_var_shift() &&
22084 VM_Version::supports_avx512bw());
22085 match(Set dst ( LShiftVS src shift));
22086 match(Set dst ( RShiftVS src shift));
22087 match(Set dst (URShiftVS src shift));
22088 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22089 ins_encode %{
22090 assert(UseAVX > 2, "required");
22091
22092 int opcode = this->ideal_Opcode();
22093 int vlen_enc = vector_length_encoding(this);
22094 if (!VM_Version::supports_avx512vl()) {
22095 vlen_enc = Assembler::AVX_512bit;
22096 }
22097 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22098 %}
22099 ins_pipe( pipe_slow );
22100 %}
22101
22102 //Integer variable shift
22103 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22104 predicate(n->as_ShiftV()->is_var_shift());
22105 match(Set dst ( LShiftVI src shift));
22106 match(Set dst ( RShiftVI src shift));
22107 match(Set dst (URShiftVI src shift));
22108 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22109 ins_encode %{
22110 assert(UseAVX >= 2, "required");
22111
22112 int opcode = this->ideal_Opcode();
22113 int vlen_enc = vector_length_encoding(this);
22114 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22115 %}
22116 ins_pipe( pipe_slow );
22117 %}
22118
22119 //Long variable shift
22120 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22121 predicate(n->as_ShiftV()->is_var_shift());
22122 match(Set dst ( LShiftVL src shift));
22123 match(Set dst (URShiftVL src shift));
22124 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22125 ins_encode %{
22126 assert(UseAVX >= 2, "required");
22127
22128 int opcode = this->ideal_Opcode();
22129 int vlen_enc = vector_length_encoding(this);
22130 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22131 %}
22132 ins_pipe( pipe_slow );
22133 %}
22134
22135 //Long variable right shift arithmetic
22136 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22137 predicate(Matcher::vector_length(n) <= 4 &&
22138 n->as_ShiftV()->is_var_shift() &&
22139 UseAVX == 2);
22140 match(Set dst (RShiftVL src shift));
22141 effect(TEMP dst, TEMP vtmp);
22142 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22143 ins_encode %{
22144 int opcode = this->ideal_Opcode();
22145 int vlen_enc = vector_length_encoding(this);
22146 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22147 $vtmp$$XMMRegister);
22148 %}
22149 ins_pipe( pipe_slow );
22150 %}
22151
22152 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22153 predicate(n->as_ShiftV()->is_var_shift() &&
22154 UseAVX > 2);
22155 match(Set dst (RShiftVL src shift));
22156 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22157 ins_encode %{
22158 int opcode = this->ideal_Opcode();
22159 int vlen_enc = vector_length_encoding(this);
22160 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22161 %}
22162 ins_pipe( pipe_slow );
22163 %}
22164
22165 // --------------------------------- AND --------------------------------------
22166
22167 instruct vand(vec dst, vec src) %{
22168 predicate(UseAVX == 0);
22169 match(Set dst (AndV dst src));
22170 format %{ "pand $dst,$src\t! and vectors" %}
22171 ins_encode %{
22172 __ pand($dst$$XMMRegister, $src$$XMMRegister);
22173 %}
22174 ins_pipe( pipe_slow );
22175 %}
22176
22177 instruct vand_reg(vec dst, vec src1, vec src2) %{
22178 predicate(UseAVX > 0);
22179 match(Set dst (AndV src1 src2));
22180 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22181 ins_encode %{
22182 int vlen_enc = vector_length_encoding(this);
22183 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22184 %}
22185 ins_pipe( pipe_slow );
22186 %}
22187
22188 instruct vand_mem(vec dst, vec src, memory mem) %{
22189 predicate((UseAVX > 0) &&
22190 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22191 match(Set dst (AndV src (LoadVector mem)));
22192 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22193 ins_encode %{
22194 int vlen_enc = vector_length_encoding(this);
22195 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22196 %}
22197 ins_pipe( pipe_slow );
22198 %}
22199
22200 // --------------------------------- OR ---------------------------------------
22201
22202 instruct vor(vec dst, vec src) %{
22203 predicate(UseAVX == 0);
22204 match(Set dst (OrV dst src));
22205 format %{ "por $dst,$src\t! or vectors" %}
22206 ins_encode %{
22207 __ por($dst$$XMMRegister, $src$$XMMRegister);
22208 %}
22209 ins_pipe( pipe_slow );
22210 %}
22211
22212 instruct vor_reg(vec dst, vec src1, vec src2) %{
22213 predicate(UseAVX > 0);
22214 match(Set dst (OrV src1 src2));
22215 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22216 ins_encode %{
22217 int vlen_enc = vector_length_encoding(this);
22218 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22219 %}
22220 ins_pipe( pipe_slow );
22221 %}
22222
22223 instruct vor_mem(vec dst, vec src, memory mem) %{
22224 predicate((UseAVX > 0) &&
22225 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22226 match(Set dst (OrV src (LoadVector mem)));
22227 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22228 ins_encode %{
22229 int vlen_enc = vector_length_encoding(this);
22230 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22231 %}
22232 ins_pipe( pipe_slow );
22233 %}
22234
22235 // --------------------------------- XOR --------------------------------------
22236
22237 instruct vxor(vec dst, vec src) %{
22238 predicate(UseAVX == 0);
22239 match(Set dst (XorV dst src));
22240 format %{ "pxor $dst,$src\t! xor vectors" %}
22241 ins_encode %{
22242 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22243 %}
22244 ins_pipe( pipe_slow );
22245 %}
22246
22247 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22248 predicate(UseAVX > 0);
22249 match(Set dst (XorV src1 src2));
22250 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22251 ins_encode %{
22252 int vlen_enc = vector_length_encoding(this);
22253 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22254 %}
22255 ins_pipe( pipe_slow );
22256 %}
22257
22258 instruct vxor_mem(vec dst, vec src, memory mem) %{
22259 predicate((UseAVX > 0) &&
22260 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22261 match(Set dst (XorV src (LoadVector mem)));
22262 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22263 ins_encode %{
22264 int vlen_enc = vector_length_encoding(this);
22265 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22266 %}
22267 ins_pipe( pipe_slow );
22268 %}
22269
22270 // --------------------------------- VectorCast --------------------------------------
22271
22272 instruct vcastBtoX(vec dst, vec src) %{
22273 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22274 match(Set dst (VectorCastB2X src));
22275 format %{ "vector_cast_b2x $dst,$src\t!" %}
22276 ins_encode %{
22277 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22278 int vlen_enc = vector_length_encoding(this);
22279 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22280 %}
22281 ins_pipe( pipe_slow );
22282 %}
22283
22284 instruct vcastBtoD(legVec dst, legVec src) %{
22285 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22286 match(Set dst (VectorCastB2X src));
22287 format %{ "vector_cast_b2x $dst,$src\t!" %}
22288 ins_encode %{
22289 int vlen_enc = vector_length_encoding(this);
22290 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22291 %}
22292 ins_pipe( pipe_slow );
22293 %}
22294
22295 instruct castStoX(vec dst, vec src) %{
22296 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22297 Matcher::vector_length(n->in(1)) <= 8 && // src
22298 Matcher::vector_element_basic_type(n) == T_BYTE);
22299 match(Set dst (VectorCastS2X src));
22300 format %{ "vector_cast_s2x $dst,$src" %}
22301 ins_encode %{
22302 assert(UseAVX > 0, "required");
22303
22304 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22305 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22306 %}
22307 ins_pipe( pipe_slow );
22308 %}
22309
22310 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22311 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22312 Matcher::vector_length(n->in(1)) == 16 && // src
22313 Matcher::vector_element_basic_type(n) == T_BYTE);
22314 effect(TEMP dst, TEMP vtmp);
22315 match(Set dst (VectorCastS2X src));
22316 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22317 ins_encode %{
22318 assert(UseAVX > 0, "required");
22319
22320 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22321 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22322 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22323 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22324 %}
22325 ins_pipe( pipe_slow );
22326 %}
22327
22328 instruct vcastStoX_evex(vec dst, vec src) %{
22329 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22330 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22331 match(Set dst (VectorCastS2X src));
22332 format %{ "vector_cast_s2x $dst,$src\t!" %}
22333 ins_encode %{
22334 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22335 int src_vlen_enc = vector_length_encoding(this, $src);
22336 int vlen_enc = vector_length_encoding(this);
22337 switch (to_elem_bt) {
22338 case T_BYTE:
22339 if (!VM_Version::supports_avx512vl()) {
22340 vlen_enc = Assembler::AVX_512bit;
22341 }
22342 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22343 break;
22344 case T_INT:
22345 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22346 break;
22347 case T_FLOAT:
22348 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22349 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22350 break;
22351 case T_LONG:
22352 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22353 break;
22354 case T_DOUBLE: {
22355 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22356 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22357 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22358 break;
22359 }
22360 default:
22361 ShouldNotReachHere();
22362 }
22363 %}
22364 ins_pipe( pipe_slow );
22365 %}
22366
22367 instruct castItoX(vec dst, vec src) %{
22368 predicate(UseAVX <= 2 &&
22369 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22370 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22371 match(Set dst (VectorCastI2X src));
22372 format %{ "vector_cast_i2x $dst,$src" %}
22373 ins_encode %{
22374 assert(UseAVX > 0, "required");
22375
22376 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22377 int vlen_enc = vector_length_encoding(this, $src);
22378
22379 if (to_elem_bt == T_BYTE) {
22380 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22381 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22382 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22383 } else {
22384 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22385 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22386 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22387 }
22388 %}
22389 ins_pipe( pipe_slow );
22390 %}
22391
22392 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22393 predicate(UseAVX <= 2 &&
22394 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22395 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22396 match(Set dst (VectorCastI2X src));
22397 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22398 effect(TEMP dst, TEMP vtmp);
22399 ins_encode %{
22400 assert(UseAVX > 0, "required");
22401
22402 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22403 int vlen_enc = vector_length_encoding(this, $src);
22404
22405 if (to_elem_bt == T_BYTE) {
22406 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22407 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22408 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22409 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22410 } else {
22411 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22412 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22413 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22414 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22415 }
22416 %}
22417 ins_pipe( pipe_slow );
22418 %}
22419
22420 instruct vcastItoX_evex(vec dst, vec src) %{
22421 predicate(UseAVX > 2 ||
22422 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22423 match(Set dst (VectorCastI2X src));
22424 format %{ "vector_cast_i2x $dst,$src\t!" %}
22425 ins_encode %{
22426 assert(UseAVX > 0, "required");
22427
22428 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22429 int src_vlen_enc = vector_length_encoding(this, $src);
22430 int dst_vlen_enc = vector_length_encoding(this);
22431 switch (dst_elem_bt) {
22432 case T_BYTE:
22433 if (!VM_Version::supports_avx512vl()) {
22434 src_vlen_enc = Assembler::AVX_512bit;
22435 }
22436 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22437 break;
22438 case T_SHORT:
22439 if (!VM_Version::supports_avx512vl()) {
22440 src_vlen_enc = Assembler::AVX_512bit;
22441 }
22442 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22443 break;
22444 case T_FLOAT:
22445 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22446 break;
22447 case T_LONG:
22448 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22449 break;
22450 case T_DOUBLE:
22451 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22452 break;
22453 default:
22454 ShouldNotReachHere();
22455 }
22456 %}
22457 ins_pipe( pipe_slow );
22458 %}
22459
22460 instruct vcastLtoBS(vec dst, vec src) %{
22461 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22462 UseAVX <= 2);
22463 match(Set dst (VectorCastL2X src));
22464 format %{ "vector_cast_l2x $dst,$src" %}
22465 ins_encode %{
22466 assert(UseAVX > 0, "required");
22467
22468 int vlen = Matcher::vector_length_in_bytes(this, $src);
22469 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22470 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22471 : ExternalAddress(vector_int_to_short_mask());
22472 if (vlen <= 16) {
22473 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22474 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22475 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22476 } else {
22477 assert(vlen <= 32, "required");
22478 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22479 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22480 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22481 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22482 }
22483 if (to_elem_bt == T_BYTE) {
22484 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22485 }
22486 %}
22487 ins_pipe( pipe_slow );
22488 %}
22489
22490 instruct vcastLtoX_evex(vec dst, vec src) %{
22491 predicate(UseAVX > 2 ||
22492 (Matcher::vector_element_basic_type(n) == T_INT ||
22493 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22494 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22495 match(Set dst (VectorCastL2X src));
22496 format %{ "vector_cast_l2x $dst,$src\t!" %}
22497 ins_encode %{
22498 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22499 int vlen = Matcher::vector_length_in_bytes(this, $src);
22500 int vlen_enc = vector_length_encoding(this, $src);
22501 switch (to_elem_bt) {
22502 case T_BYTE:
22503 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22504 vlen_enc = Assembler::AVX_512bit;
22505 }
22506 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22507 break;
22508 case T_SHORT:
22509 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22510 vlen_enc = Assembler::AVX_512bit;
22511 }
22512 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22513 break;
22514 case T_INT:
22515 if (vlen == 8) {
22516 if ($dst$$XMMRegister != $src$$XMMRegister) {
22517 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22518 }
22519 } else if (vlen == 16) {
22520 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22521 } else if (vlen == 32) {
22522 if (UseAVX > 2) {
22523 if (!VM_Version::supports_avx512vl()) {
22524 vlen_enc = Assembler::AVX_512bit;
22525 }
22526 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22527 } else {
22528 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22529 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22530 }
22531 } else { // vlen == 64
22532 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22533 }
22534 break;
22535 case T_FLOAT:
22536 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22537 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22538 break;
22539 case T_DOUBLE:
22540 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22541 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22542 break;
22543
22544 default: assert(false, "%s", type2name(to_elem_bt));
22545 }
22546 %}
22547 ins_pipe( pipe_slow );
22548 %}
22549
22550 instruct vcastFtoD_reg(vec dst, vec src) %{
22551 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22552 match(Set dst (VectorCastF2X src));
22553 format %{ "vector_cast_f2d $dst,$src\t!" %}
22554 ins_encode %{
22555 int vlen_enc = vector_length_encoding(this);
22556 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22557 %}
22558 ins_pipe( pipe_slow );
22559 %}
22560
22561
22562 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22563 predicate(!VM_Version::supports_avx10_2() &&
22564 !VM_Version::supports_avx512vl() &&
22565 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22566 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22567 is_integral_type(Matcher::vector_element_basic_type(n)));
22568 match(Set dst (VectorCastF2X src));
22569 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22570 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22571 ins_encode %{
22572 int vlen_enc = vector_length_encoding(this, $src);
22573 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22574 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22575 // 32 bit addresses for register indirect addressing mode since stub constants
22576 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22577 // However, targets are free to increase this limit, but having a large code cache size
22578 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22579 // cap we save a temporary register allocation which in limiting case can prevent
22580 // spilling in high register pressure blocks.
22581 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22582 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22583 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22584 %}
22585 ins_pipe( pipe_slow );
22586 %}
22587
22588 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22589 predicate(!VM_Version::supports_avx10_2() &&
22590 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22591 is_integral_type(Matcher::vector_element_basic_type(n)));
22592 match(Set dst (VectorCastF2X src));
22593 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22594 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22595 ins_encode %{
22596 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22597 if (to_elem_bt == T_LONG) {
22598 int vlen_enc = vector_length_encoding(this);
22599 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22600 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22601 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22602 } else {
22603 int vlen_enc = vector_length_encoding(this, $src);
22604 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22605 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22606 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22607 }
22608 %}
22609 ins_pipe( pipe_slow );
22610 %}
22611
22612 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22613 predicate(VM_Version::supports_avx10_2() &&
22614 is_integral_type(Matcher::vector_element_basic_type(n)));
22615 match(Set dst (VectorCastF2X src));
22616 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22617 ins_encode %{
22618 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22619 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22620 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22621 %}
22622 ins_pipe( pipe_slow );
22623 %}
22624
22625 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22626 predicate(VM_Version::supports_avx10_2() &&
22627 is_integral_type(Matcher::vector_element_basic_type(n)));
22628 match(Set dst (VectorCastF2X (LoadVector src)));
22629 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22630 ins_encode %{
22631 int vlen = Matcher::vector_length(this);
22632 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22633 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22634 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22635 %}
22636 ins_pipe( pipe_slow );
22637 %}
22638
22639 instruct vcastDtoF_reg(vec dst, vec src) %{
22640 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22641 match(Set dst (VectorCastD2X src));
22642 format %{ "vector_cast_d2x $dst,$src\t!" %}
22643 ins_encode %{
22644 int vlen_enc = vector_length_encoding(this, $src);
22645 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22646 %}
22647 ins_pipe( pipe_slow );
22648 %}
22649
22650 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22651 predicate(!VM_Version::supports_avx10_2() &&
22652 !VM_Version::supports_avx512vl() &&
22653 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22654 is_integral_type(Matcher::vector_element_basic_type(n)));
22655 match(Set dst (VectorCastD2X src));
22656 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22657 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22658 ins_encode %{
22659 int vlen_enc = vector_length_encoding(this, $src);
22660 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22661 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22662 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22663 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22664 %}
22665 ins_pipe( pipe_slow );
22666 %}
22667
22668 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22669 predicate(!VM_Version::supports_avx10_2() &&
22670 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22671 is_integral_type(Matcher::vector_element_basic_type(n)));
22672 match(Set dst (VectorCastD2X src));
22673 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22674 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22675 ins_encode %{
22676 int vlen_enc = vector_length_encoding(this, $src);
22677 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22678 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22679 ExternalAddress(vector_float_signflip());
22680 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22681 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22682 %}
22683 ins_pipe( pipe_slow );
22684 %}
22685
22686 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22687 predicate(VM_Version::supports_avx10_2() &&
22688 is_integral_type(Matcher::vector_element_basic_type(n)));
22689 match(Set dst (VectorCastD2X src));
22690 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22691 ins_encode %{
22692 int vlen_enc = vector_length_encoding(this, $src);
22693 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22694 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22695 %}
22696 ins_pipe( pipe_slow );
22697 %}
22698
22699 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22700 predicate(VM_Version::supports_avx10_2() &&
22701 is_integral_type(Matcher::vector_element_basic_type(n)));
22702 match(Set dst (VectorCastD2X (LoadVector src)));
22703 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22704 ins_encode %{
22705 int vlen = Matcher::vector_length(this);
22706 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22707 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22708 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22709 %}
22710 ins_pipe( pipe_slow );
22711 %}
22712
22713 instruct vucast(vec dst, vec src) %{
22714 match(Set dst (VectorUCastB2X src));
22715 match(Set dst (VectorUCastS2X src));
22716 match(Set dst (VectorUCastI2X src));
22717 format %{ "vector_ucast $dst,$src\t!" %}
22718 ins_encode %{
22719 assert(UseAVX > 0, "required");
22720
22721 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22722 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22723 int vlen_enc = vector_length_encoding(this);
22724 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22725 %}
22726 ins_pipe( pipe_slow );
22727 %}
22728
22729 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22730 predicate(!VM_Version::supports_avx512vl() &&
22731 Matcher::vector_length_in_bytes(n) < 64 &&
22732 Matcher::vector_element_basic_type(n) == T_INT);
22733 match(Set dst (RoundVF src));
22734 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22735 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22736 ins_encode %{
22737 int vlen_enc = vector_length_encoding(this);
22738 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22739 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22740 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22741 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22742 %}
22743 ins_pipe( pipe_slow );
22744 %}
22745
22746 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22747 predicate((VM_Version::supports_avx512vl() ||
22748 Matcher::vector_length_in_bytes(n) == 64) &&
22749 Matcher::vector_element_basic_type(n) == T_INT);
22750 match(Set dst (RoundVF src));
22751 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22752 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22753 ins_encode %{
22754 int vlen_enc = vector_length_encoding(this);
22755 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22756 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22757 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22758 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22759 %}
22760 ins_pipe( pipe_slow );
22761 %}
22762
22763 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22764 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22765 match(Set dst (RoundVD src));
22766 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22767 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22768 ins_encode %{
22769 int vlen_enc = vector_length_encoding(this);
22770 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22771 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22772 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22773 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22774 %}
22775 ins_pipe( pipe_slow );
22776 %}
22777
22778 // --------------------------------- VectorMaskCmp --------------------------------------
22779
22780 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22781 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22782 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22783 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22784 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22785 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22786 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22787 ins_encode %{
22788 int vlen_enc = vector_length_encoding(this, $src1);
22789 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22790 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22791 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22792 } else {
22793 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22794 }
22795 %}
22796 ins_pipe( pipe_slow );
22797 %}
22798
22799 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22800 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22801 n->bottom_type()->isa_vectmask() == nullptr &&
22802 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22803 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22804 effect(TEMP ktmp);
22805 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22806 ins_encode %{
22807 int vlen_enc = Assembler::AVX_512bit;
22808 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22809 KRegister mask = k0; // The comparison itself is not being masked.
22810 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22811 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22812 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22813 } else {
22814 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22815 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22816 }
22817 %}
22818 ins_pipe( pipe_slow );
22819 %}
22820
22821 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22822 predicate(n->bottom_type()->isa_vectmask() &&
22823 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22824 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22825 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22826 ins_encode %{
22827 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22828 int vlen_enc = vector_length_encoding(this, $src1);
22829 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22830 KRegister mask = k0; // The comparison itself is not being masked.
22831 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22832 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22833 } else {
22834 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22835 }
22836 %}
22837 ins_pipe( pipe_slow );
22838 %}
22839
22840 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22841 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22842 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22843 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22844 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22845 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22846 (n->in(2)->get_int() == BoolTest::eq ||
22847 n->in(2)->get_int() == BoolTest::lt ||
22848 n->in(2)->get_int() == BoolTest::gt)); // cond
22849 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22850 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22851 ins_encode %{
22852 int vlen_enc = vector_length_encoding(this, $src1);
22853 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22854 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22855 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22856 %}
22857 ins_pipe( pipe_slow );
22858 %}
22859
22860 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22861 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22862 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22863 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22864 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22865 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22866 (n->in(2)->get_int() == BoolTest::ne ||
22867 n->in(2)->get_int() == BoolTest::le ||
22868 n->in(2)->get_int() == BoolTest::ge)); // cond
22869 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22870 effect(TEMP dst, TEMP xtmp);
22871 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22872 ins_encode %{
22873 int vlen_enc = vector_length_encoding(this, $src1);
22874 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22875 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22876 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22877 %}
22878 ins_pipe( pipe_slow );
22879 %}
22880
22881 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22882 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22883 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22884 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22885 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22886 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22887 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22888 effect(TEMP dst, TEMP xtmp);
22889 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22890 ins_encode %{
22891 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22892 int vlen_enc = vector_length_encoding(this, $src1);
22893 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22894 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22895
22896 if (vlen_enc == Assembler::AVX_128bit) {
22897 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22898 } else {
22899 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22900 }
22901 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22902 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22903 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22904 %}
22905 ins_pipe( pipe_slow );
22906 %}
22907
22908 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22909 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22910 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22911 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22912 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22913 effect(TEMP ktmp);
22914 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22915 ins_encode %{
22916 assert(UseAVX > 2, "required");
22917
22918 int vlen_enc = vector_length_encoding(this, $src1);
22919 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22920 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22921 KRegister mask = k0; // The comparison itself is not being masked.
22922 bool merge = false;
22923 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22924
22925 switch (src1_elem_bt) {
22926 case T_INT: {
22927 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22928 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22929 break;
22930 }
22931 case T_LONG: {
22932 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22933 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22934 break;
22935 }
22936 default: assert(false, "%s", type2name(src1_elem_bt));
22937 }
22938 %}
22939 ins_pipe( pipe_slow );
22940 %}
22941
22942
22943 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22944 predicate(n->bottom_type()->isa_vectmask() &&
22945 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22946 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22947 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22948 ins_encode %{
22949 assert(UseAVX > 2, "required");
22950 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22951
22952 int vlen_enc = vector_length_encoding(this, $src1);
22953 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22954 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22955 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22956
22957 // Comparison i
22958 switch (src1_elem_bt) {
22959 case T_BYTE: {
22960 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22961 break;
22962 }
22963 case T_SHORT: {
22964 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22965 break;
22966 }
22967 case T_INT: {
22968 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22969 break;
22970 }
22971 case T_LONG: {
22972 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22973 break;
22974 }
22975 default: assert(false, "%s", type2name(src1_elem_bt));
22976 }
22977 %}
22978 ins_pipe( pipe_slow );
22979 %}
22980
22981 // Extract
22982
22983 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22984 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22985 match(Set dst (ExtractI src idx));
22986 match(Set dst (ExtractS src idx));
22987 match(Set dst (ExtractB src idx));
22988 format %{ "extractI $dst,$src,$idx\t!" %}
22989 ins_encode %{
22990 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22991
22992 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22993 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22994 %}
22995 ins_pipe( pipe_slow );
22996 %}
22997
22998 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22999 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
23000 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
23001 match(Set dst (ExtractI src idx));
23002 match(Set dst (ExtractS src idx));
23003 match(Set dst (ExtractB src idx));
23004 effect(TEMP vtmp);
23005 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
23006 ins_encode %{
23007 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23008
23009 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
23010 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23011 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
23012 %}
23013 ins_pipe( pipe_slow );
23014 %}
23015
23016 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
23017 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
23018 match(Set dst (ExtractL src idx));
23019 format %{ "extractL $dst,$src,$idx\t!" %}
23020 ins_encode %{
23021 assert(UseSSE >= 4, "required");
23022 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23023
23024 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
23025 %}
23026 ins_pipe( pipe_slow );
23027 %}
23028
23029 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23030 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23031 Matcher::vector_length(n->in(1)) == 8); // src
23032 match(Set dst (ExtractL src idx));
23033 effect(TEMP vtmp);
23034 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23035 ins_encode %{
23036 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23037
23038 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23039 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23040 %}
23041 ins_pipe( pipe_slow );
23042 %}
23043
23044 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23045 predicate(Matcher::vector_length(n->in(1)) <= 4);
23046 match(Set dst (ExtractF src idx));
23047 effect(TEMP dst, TEMP vtmp);
23048 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23049 ins_encode %{
23050 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23051
23052 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23053 %}
23054 ins_pipe( pipe_slow );
23055 %}
23056
23057 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23058 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23059 Matcher::vector_length(n->in(1)/*src*/) == 16);
23060 match(Set dst (ExtractF src idx));
23061 effect(TEMP vtmp);
23062 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23063 ins_encode %{
23064 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23065
23066 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23067 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23068 %}
23069 ins_pipe( pipe_slow );
23070 %}
23071
23072 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23073 predicate(Matcher::vector_length(n->in(1)) == 2); // src
23074 match(Set dst (ExtractD src idx));
23075 format %{ "extractD $dst,$src,$idx\t!" %}
23076 ins_encode %{
23077 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23078
23079 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23080 %}
23081 ins_pipe( pipe_slow );
23082 %}
23083
23084 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23085 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23086 Matcher::vector_length(n->in(1)) == 8); // src
23087 match(Set dst (ExtractD src idx));
23088 effect(TEMP vtmp);
23089 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23090 ins_encode %{
23091 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23092
23093 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23094 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23095 %}
23096 ins_pipe( pipe_slow );
23097 %}
23098
23099 // --------------------------------- Vector Blend --------------------------------------
23100
23101 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23102 predicate(UseAVX == 0);
23103 match(Set dst (VectorBlend (Binary dst src) mask));
23104 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
23105 effect(TEMP tmp);
23106 ins_encode %{
23107 assert(UseSSE >= 4, "required");
23108
23109 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23110 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23111 }
23112 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23113 %}
23114 ins_pipe( pipe_slow );
23115 %}
23116
23117 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23118 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23119 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23120 Matcher::vector_length_in_bytes(n) <= 32 &&
23121 is_integral_type(Matcher::vector_element_basic_type(n)));
23122 match(Set dst (VectorBlend (Binary src1 src2) mask));
23123 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23124 ins_encode %{
23125 int vlen_enc = vector_length_encoding(this);
23126 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23127 %}
23128 ins_pipe( pipe_slow );
23129 %}
23130
23131 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23132 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23133 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23134 Matcher::vector_length_in_bytes(n) <= 32 &&
23135 !is_integral_type(Matcher::vector_element_basic_type(n)));
23136 match(Set dst (VectorBlend (Binary src1 src2) mask));
23137 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23138 ins_encode %{
23139 int vlen_enc = vector_length_encoding(this);
23140 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23141 %}
23142 ins_pipe( pipe_slow );
23143 %}
23144
23145 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23146 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23147 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23148 Matcher::vector_length_in_bytes(n) <= 32);
23149 match(Set dst (VectorBlend (Binary src1 src2) mask));
23150 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23151 effect(TEMP vtmp, TEMP dst);
23152 ins_encode %{
23153 int vlen_enc = vector_length_encoding(this);
23154 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23155 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23156 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23157 %}
23158 ins_pipe( pipe_slow );
23159 %}
23160
23161 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23162 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23163 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23164 match(Set dst (VectorBlend (Binary src1 src2) mask));
23165 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23166 effect(TEMP ktmp);
23167 ins_encode %{
23168 int vlen_enc = Assembler::AVX_512bit;
23169 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23170 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23171 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23172 %}
23173 ins_pipe( pipe_slow );
23174 %}
23175
23176
23177 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23178 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23179 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23180 VM_Version::supports_avx512bw()));
23181 match(Set dst (VectorBlend (Binary src1 src2) mask));
23182 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23183 ins_encode %{
23184 int vlen_enc = vector_length_encoding(this);
23185 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23186 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23187 %}
23188 ins_pipe( pipe_slow );
23189 %}
23190
23191 // --------------------------------- ABS --------------------------------------
23192 // a = |a|
23193 instruct vabsB_reg(vec dst, vec src) %{
23194 match(Set dst (AbsVB src));
23195 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23196 ins_encode %{
23197 uint vlen = Matcher::vector_length(this);
23198 if (vlen <= 16) {
23199 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23200 } else {
23201 int vlen_enc = vector_length_encoding(this);
23202 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23203 }
23204 %}
23205 ins_pipe( pipe_slow );
23206 %}
23207
23208 instruct vabsS_reg(vec dst, vec src) %{
23209 match(Set dst (AbsVS src));
23210 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23211 ins_encode %{
23212 uint vlen = Matcher::vector_length(this);
23213 if (vlen <= 8) {
23214 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23215 } else {
23216 int vlen_enc = vector_length_encoding(this);
23217 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23218 }
23219 %}
23220 ins_pipe( pipe_slow );
23221 %}
23222
23223 instruct vabsI_reg(vec dst, vec src) %{
23224 match(Set dst (AbsVI src));
23225 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23226 ins_encode %{
23227 uint vlen = Matcher::vector_length(this);
23228 if (vlen <= 4) {
23229 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23230 } else {
23231 int vlen_enc = vector_length_encoding(this);
23232 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23233 }
23234 %}
23235 ins_pipe( pipe_slow );
23236 %}
23237
23238 instruct vabsL_reg(vec dst, vec src) %{
23239 match(Set dst (AbsVL src));
23240 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23241 ins_encode %{
23242 assert(UseAVX > 2, "required");
23243 int vlen_enc = vector_length_encoding(this);
23244 if (!VM_Version::supports_avx512vl()) {
23245 vlen_enc = Assembler::AVX_512bit;
23246 }
23247 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23248 %}
23249 ins_pipe( pipe_slow );
23250 %}
23251
23252 // --------------------------------- ABSNEG --------------------------------------
23253
23254 instruct vabsnegF(vec dst, vec src) %{
23255 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23256 match(Set dst (AbsVF src));
23257 match(Set dst (NegVF src));
23258 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23259 ins_cost(150);
23260 ins_encode %{
23261 int opcode = this->ideal_Opcode();
23262 int vlen = Matcher::vector_length(this);
23263 if (vlen == 2) {
23264 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23265 } else {
23266 assert(vlen == 8 || vlen == 16, "required");
23267 int vlen_enc = vector_length_encoding(this);
23268 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23269 }
23270 %}
23271 ins_pipe( pipe_slow );
23272 %}
23273
23274 instruct vabsneg4F(vec dst) %{
23275 predicate(Matcher::vector_length(n) == 4);
23276 match(Set dst (AbsVF dst));
23277 match(Set dst (NegVF dst));
23278 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23279 ins_cost(150);
23280 ins_encode %{
23281 int opcode = this->ideal_Opcode();
23282 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23283 %}
23284 ins_pipe( pipe_slow );
23285 %}
23286
23287 instruct vabsnegD(vec dst, vec src) %{
23288 match(Set dst (AbsVD src));
23289 match(Set dst (NegVD src));
23290 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23291 ins_encode %{
23292 int opcode = this->ideal_Opcode();
23293 uint vlen = Matcher::vector_length(this);
23294 if (vlen == 2) {
23295 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23296 } else {
23297 int vlen_enc = vector_length_encoding(this);
23298 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23299 }
23300 %}
23301 ins_pipe( pipe_slow );
23302 %}
23303
23304 //------------------------------------- VectorTest --------------------------------------------
23305
23306 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23307 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23308 match(Set cr (VectorTest src1 src2));
23309 effect(TEMP vtmp);
23310 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23311 ins_encode %{
23312 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23313 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23314 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23315 %}
23316 ins_pipe( pipe_slow );
23317 %}
23318
23319 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23320 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23321 match(Set cr (VectorTest src1 src2));
23322 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23323 ins_encode %{
23324 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23325 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23326 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23327 %}
23328 ins_pipe( pipe_slow );
23329 %}
23330
23331 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23332 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23333 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23334 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23335 match(Set cr (VectorTest src1 src2));
23336 effect(TEMP tmp);
23337 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23338 ins_encode %{
23339 uint masklen = Matcher::vector_length(this, $src1);
23340 __ kmovwl($tmp$$Register, $src1$$KRegister);
23341 __ andl($tmp$$Register, (1 << masklen) - 1);
23342 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23343 %}
23344 ins_pipe( pipe_slow );
23345 %}
23346
23347 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23348 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23349 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23350 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23351 match(Set cr (VectorTest src1 src2));
23352 effect(TEMP tmp);
23353 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23354 ins_encode %{
23355 uint masklen = Matcher::vector_length(this, $src1);
23356 __ kmovwl($tmp$$Register, $src1$$KRegister);
23357 __ andl($tmp$$Register, (1 << masklen) - 1);
23358 %}
23359 ins_pipe( pipe_slow );
23360 %}
23361
23362 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23363 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23364 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23365 match(Set cr (VectorTest src1 src2));
23366 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23367 ins_encode %{
23368 uint masklen = Matcher::vector_length(this, $src1);
23369 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23370 %}
23371 ins_pipe( pipe_slow );
23372 %}
23373
23374 //------------------------------------- LoadMask --------------------------------------------
23375
23376 instruct loadMask(legVec dst, legVec src) %{
23377 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23378 match(Set dst (VectorLoadMask src));
23379 effect(TEMP dst);
23380 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23381 ins_encode %{
23382 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23383 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23384 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23385 %}
23386 ins_pipe( pipe_slow );
23387 %}
23388
23389 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23390 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23391 match(Set dst (VectorLoadMask src));
23392 effect(TEMP xtmp);
23393 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23394 ins_encode %{
23395 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23396 true, Assembler::AVX_512bit);
23397 %}
23398 ins_pipe( pipe_slow );
23399 %}
23400
23401 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23402 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23403 match(Set dst (VectorLoadMask src));
23404 effect(TEMP xtmp);
23405 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23406 ins_encode %{
23407 int vlen_enc = vector_length_encoding(in(1));
23408 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23409 false, vlen_enc);
23410 %}
23411 ins_pipe( pipe_slow );
23412 %}
23413
23414 //------------------------------------- StoreMask --------------------------------------------
23415
23416 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23417 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23418 match(Set dst (VectorStoreMask src size));
23419 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23420 ins_encode %{
23421 int vlen = Matcher::vector_length(this);
23422 if (vlen <= 16 && UseAVX <= 2) {
23423 assert(UseSSE >= 3, "required");
23424 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23425 } else {
23426 assert(UseAVX > 0, "required");
23427 int src_vlen_enc = vector_length_encoding(this, $src);
23428 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23429 }
23430 %}
23431 ins_pipe( pipe_slow );
23432 %}
23433
23434 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23435 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23436 match(Set dst (VectorStoreMask src size));
23437 effect(TEMP_DEF dst, TEMP xtmp);
23438 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23439 ins_encode %{
23440 int vlen_enc = Assembler::AVX_128bit;
23441 int vlen = Matcher::vector_length(this);
23442 if (vlen <= 8) {
23443 assert(UseSSE >= 3, "required");
23444 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23445 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23446 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23447 } else {
23448 assert(UseAVX > 0, "required");
23449 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23450 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23451 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23452 }
23453 %}
23454 ins_pipe( pipe_slow );
23455 %}
23456
23457 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23458 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23459 match(Set dst (VectorStoreMask src size));
23460 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23461 effect(TEMP_DEF dst, TEMP xtmp);
23462 ins_encode %{
23463 int vlen_enc = Assembler::AVX_128bit;
23464 int vlen = Matcher::vector_length(this);
23465 if (vlen <= 4) {
23466 assert(UseSSE >= 3, "required");
23467 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23468 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23469 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23470 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23471 } else {
23472 assert(UseAVX > 0, "required");
23473 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23474 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23475 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23476 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23477 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23478 }
23479 %}
23480 ins_pipe( pipe_slow );
23481 %}
23482
23483 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23484 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23485 match(Set dst (VectorStoreMask src size));
23486 effect(TEMP_DEF dst, TEMP xtmp);
23487 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23488 ins_encode %{
23489 assert(UseSSE >= 3, "required");
23490 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23491 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23492 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23493 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23494 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23495 %}
23496 ins_pipe( pipe_slow );
23497 %}
23498
23499 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23500 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23501 match(Set dst (VectorStoreMask src size));
23502 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23503 effect(TEMP_DEF dst, TEMP vtmp);
23504 ins_encode %{
23505 int vlen_enc = Assembler::AVX_128bit;
23506 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23507 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23508 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23509 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23510 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23511 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23512 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23513 %}
23514 ins_pipe( pipe_slow );
23515 %}
23516
23517 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23518 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23519 match(Set dst (VectorStoreMask src size));
23520 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23521 ins_encode %{
23522 int src_vlen_enc = vector_length_encoding(this, $src);
23523 int dst_vlen_enc = vector_length_encoding(this);
23524 if (!VM_Version::supports_avx512vl()) {
23525 src_vlen_enc = Assembler::AVX_512bit;
23526 }
23527 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23528 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23529 %}
23530 ins_pipe( pipe_slow );
23531 %}
23532
23533 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23534 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23535 match(Set dst (VectorStoreMask src size));
23536 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23537 ins_encode %{
23538 int src_vlen_enc = vector_length_encoding(this, $src);
23539 int dst_vlen_enc = vector_length_encoding(this);
23540 if (!VM_Version::supports_avx512vl()) {
23541 src_vlen_enc = Assembler::AVX_512bit;
23542 }
23543 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23544 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23545 %}
23546 ins_pipe( pipe_slow );
23547 %}
23548
23549 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23550 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23551 match(Set dst (VectorStoreMask mask size));
23552 effect(TEMP_DEF dst);
23553 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23554 ins_encode %{
23555 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23556 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23557 false, Assembler::AVX_512bit, noreg);
23558 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23559 %}
23560 ins_pipe( pipe_slow );
23561 %}
23562
23563 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23564 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23565 match(Set dst (VectorStoreMask mask size));
23566 effect(TEMP_DEF dst);
23567 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23568 ins_encode %{
23569 int dst_vlen_enc = vector_length_encoding(this);
23570 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23571 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23572 %}
23573 ins_pipe( pipe_slow );
23574 %}
23575
23576 instruct vmaskcast_evex(kReg dst) %{
23577 match(Set dst (VectorMaskCast dst));
23578 ins_cost(0);
23579 format %{ "vector_mask_cast $dst" %}
23580 ins_encode %{
23581 // empty
23582 %}
23583 ins_pipe(empty);
23584 %}
23585
23586 instruct vmaskcast(vec dst) %{
23587 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23588 match(Set dst (VectorMaskCast dst));
23589 ins_cost(0);
23590 format %{ "vector_mask_cast $dst" %}
23591 ins_encode %{
23592 // empty
23593 %}
23594 ins_pipe(empty);
23595 %}
23596
23597 instruct vmaskcast_avx(vec dst, vec src) %{
23598 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23599 match(Set dst (VectorMaskCast src));
23600 format %{ "vector_mask_cast $dst, $src" %}
23601 ins_encode %{
23602 int vlen = Matcher::vector_length(this);
23603 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23604 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23605 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23606 %}
23607 ins_pipe(pipe_slow);
23608 %}
23609
23610 //-------------------------------- Load Iota Indices ----------------------------------
23611
23612 instruct loadIotaIndices(vec dst, immI_0 src) %{
23613 match(Set dst (VectorLoadConst src));
23614 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23615 ins_encode %{
23616 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23617 BasicType bt = Matcher::vector_element_basic_type(this);
23618 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23619 %}
23620 ins_pipe( pipe_slow );
23621 %}
23622
23623 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23624 match(Set dst (PopulateIndex src1 src2));
23625 effect(TEMP dst, TEMP vtmp);
23626 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23627 ins_encode %{
23628 assert($src2$$constant == 1, "required");
23629 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23630 int vlen_enc = vector_length_encoding(this);
23631 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23632 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23633 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23634 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23635 %}
23636 ins_pipe( pipe_slow );
23637 %}
23638
23639 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23640 match(Set dst (PopulateIndex src1 src2));
23641 effect(TEMP dst, TEMP vtmp);
23642 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23643 ins_encode %{
23644 assert($src2$$constant == 1, "required");
23645 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23646 int vlen_enc = vector_length_encoding(this);
23647 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23648 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23649 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23650 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23651 %}
23652 ins_pipe( pipe_slow );
23653 %}
23654
23655 //-------------------------------- Rearrange ----------------------------------
23656
23657 // LoadShuffle/Rearrange for Byte
23658 instruct rearrangeB(vec dst, vec shuffle) %{
23659 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23660 Matcher::vector_length(n) < 32);
23661 match(Set dst (VectorRearrange dst shuffle));
23662 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23663 ins_encode %{
23664 assert(UseSSE >= 4, "required");
23665 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23666 %}
23667 ins_pipe( pipe_slow );
23668 %}
23669
23670 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23671 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23672 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23673 match(Set dst (VectorRearrange src shuffle));
23674 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23675 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23676 ins_encode %{
23677 assert(UseAVX >= 2, "required");
23678 // Swap src into vtmp1
23679 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23680 // Shuffle swapped src to get entries from other 128 bit lane
23681 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23682 // Shuffle original src to get entries from self 128 bit lane
23683 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23684 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23685 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23686 // Perform the blend
23687 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23688 %}
23689 ins_pipe( pipe_slow );
23690 %}
23691
23692
23693 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23694 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23695 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23696 match(Set dst (VectorRearrange src shuffle));
23697 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23698 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23699 ins_encode %{
23700 int vlen_enc = vector_length_encoding(this);
23701 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23702 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23703 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23704 %}
23705 ins_pipe( pipe_slow );
23706 %}
23707
23708 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23709 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23710 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23711 match(Set dst (VectorRearrange src shuffle));
23712 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23713 ins_encode %{
23714 int vlen_enc = vector_length_encoding(this);
23715 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23716 %}
23717 ins_pipe( pipe_slow );
23718 %}
23719
23720 // LoadShuffle/Rearrange for Short
23721
23722 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23723 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23724 !VM_Version::supports_avx512bw());
23725 match(Set dst (VectorLoadShuffle src));
23726 effect(TEMP dst, TEMP vtmp);
23727 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23728 ins_encode %{
23729 // Create a byte shuffle mask from short shuffle mask
23730 // only byte shuffle instruction available on these platforms
23731 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23732 if (UseAVX == 0) {
23733 assert(vlen_in_bytes <= 16, "required");
23734 // Multiply each shuffle by two to get byte index
23735 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23736 __ psllw($vtmp$$XMMRegister, 1);
23737
23738 // Duplicate to create 2 copies of byte index
23739 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23740 __ psllw($dst$$XMMRegister, 8);
23741 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23742
23743 // Add one to get alternate byte index
23744 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23745 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23746 } else {
23747 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23748 int vlen_enc = vector_length_encoding(this);
23749 // Multiply each shuffle by two to get byte index
23750 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23751
23752 // Duplicate to create 2 copies of byte index
23753 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23754 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23755
23756 // Add one to get alternate byte index
23757 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23758 }
23759 %}
23760 ins_pipe( pipe_slow );
23761 %}
23762
23763 instruct rearrangeS(vec dst, vec shuffle) %{
23764 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23765 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23766 match(Set dst (VectorRearrange dst shuffle));
23767 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23768 ins_encode %{
23769 assert(UseSSE >= 4, "required");
23770 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23771 %}
23772 ins_pipe( pipe_slow );
23773 %}
23774
23775 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23776 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23777 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23778 match(Set dst (VectorRearrange src shuffle));
23779 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23780 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23781 ins_encode %{
23782 assert(UseAVX >= 2, "required");
23783 // Swap src into vtmp1
23784 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23785 // Shuffle swapped src to get entries from other 128 bit lane
23786 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23787 // Shuffle original src to get entries from self 128 bit lane
23788 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23789 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23790 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23791 // Perform the blend
23792 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23793 %}
23794 ins_pipe( pipe_slow );
23795 %}
23796
23797 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23798 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23799 VM_Version::supports_avx512bw());
23800 match(Set dst (VectorRearrange src shuffle));
23801 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23802 ins_encode %{
23803 int vlen_enc = vector_length_encoding(this);
23804 if (!VM_Version::supports_avx512vl()) {
23805 vlen_enc = Assembler::AVX_512bit;
23806 }
23807 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23808 %}
23809 ins_pipe( pipe_slow );
23810 %}
23811
23812 // LoadShuffle/Rearrange for Integer and Float
23813
23814 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23815 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23816 Matcher::vector_length(n) == 4 && UseAVX == 0);
23817 match(Set dst (VectorLoadShuffle src));
23818 effect(TEMP dst, TEMP vtmp);
23819 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23820 ins_encode %{
23821 assert(UseSSE >= 4, "required");
23822
23823 // Create a byte shuffle mask from int shuffle mask
23824 // only byte shuffle instruction available on these platforms
23825
23826 // Duplicate and multiply each shuffle by 4
23827 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23828 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23829 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23830 __ psllw($vtmp$$XMMRegister, 2);
23831
23832 // Duplicate again to create 4 copies of byte index
23833 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23834 __ psllw($dst$$XMMRegister, 8);
23835 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23836
23837 // Add 3,2,1,0 to get alternate byte index
23838 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23839 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23840 %}
23841 ins_pipe( pipe_slow );
23842 %}
23843
23844 instruct rearrangeI(vec dst, vec shuffle) %{
23845 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23846 UseAVX == 0);
23847 match(Set dst (VectorRearrange dst shuffle));
23848 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23849 ins_encode %{
23850 assert(UseSSE >= 4, "required");
23851 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23852 %}
23853 ins_pipe( pipe_slow );
23854 %}
23855
23856 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23857 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23858 UseAVX > 0);
23859 match(Set dst (VectorRearrange src shuffle));
23860 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23861 ins_encode %{
23862 int vlen_enc = vector_length_encoding(this);
23863 BasicType bt = Matcher::vector_element_basic_type(this);
23864 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23865 %}
23866 ins_pipe( pipe_slow );
23867 %}
23868
23869 // LoadShuffle/Rearrange for Long and Double
23870
23871 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23872 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23873 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23874 match(Set dst (VectorLoadShuffle src));
23875 effect(TEMP dst, TEMP vtmp);
23876 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23877 ins_encode %{
23878 assert(UseAVX >= 2, "required");
23879
23880 int vlen_enc = vector_length_encoding(this);
23881 // Create a double word shuffle mask from long shuffle mask
23882 // only double word shuffle instruction available on these platforms
23883
23884 // Multiply each shuffle by two to get double word index
23885 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23886
23887 // Duplicate each double word shuffle
23888 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23889 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23890
23891 // Add one to get alternate double word index
23892 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23893 %}
23894 ins_pipe( pipe_slow );
23895 %}
23896
23897 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23898 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23899 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23900 match(Set dst (VectorRearrange src shuffle));
23901 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23902 ins_encode %{
23903 assert(UseAVX >= 2, "required");
23904
23905 int vlen_enc = vector_length_encoding(this);
23906 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23907 %}
23908 ins_pipe( pipe_slow );
23909 %}
23910
23911 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23912 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23913 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23914 match(Set dst (VectorRearrange src shuffle));
23915 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23916 ins_encode %{
23917 assert(UseAVX > 2, "required");
23918
23919 int vlen_enc = vector_length_encoding(this);
23920 if (vlen_enc == Assembler::AVX_128bit) {
23921 vlen_enc = Assembler::AVX_256bit;
23922 }
23923 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23924 %}
23925 ins_pipe( pipe_slow );
23926 %}
23927
23928 // --------------------------------- FMA --------------------------------------
23929 // a * b + c
23930
23931 instruct vfmaF_reg(vec a, vec b, vec c) %{
23932 match(Set c (FmaVF c (Binary a b)));
23933 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23934 ins_cost(150);
23935 ins_encode %{
23936 assert(UseFMA, "not enabled");
23937 int vlen_enc = vector_length_encoding(this);
23938 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23939 %}
23940 ins_pipe( pipe_slow );
23941 %}
23942
23943 instruct vfmaF_mem(vec a, memory b, vec c) %{
23944 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23945 match(Set c (FmaVF c (Binary a (LoadVector b))));
23946 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23947 ins_cost(150);
23948 ins_encode %{
23949 assert(UseFMA, "not enabled");
23950 int vlen_enc = vector_length_encoding(this);
23951 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23952 %}
23953 ins_pipe( pipe_slow );
23954 %}
23955
23956 instruct vfmaD_reg(vec a, vec b, vec c) %{
23957 match(Set c (FmaVD c (Binary a b)));
23958 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23959 ins_cost(150);
23960 ins_encode %{
23961 assert(UseFMA, "not enabled");
23962 int vlen_enc = vector_length_encoding(this);
23963 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23964 %}
23965 ins_pipe( pipe_slow );
23966 %}
23967
23968 instruct vfmaD_mem(vec a, memory b, vec c) %{
23969 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23970 match(Set c (FmaVD c (Binary a (LoadVector b))));
23971 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23972 ins_cost(150);
23973 ins_encode %{
23974 assert(UseFMA, "not enabled");
23975 int vlen_enc = vector_length_encoding(this);
23976 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23977 %}
23978 ins_pipe( pipe_slow );
23979 %}
23980
23981 // --------------------------------- Vector Multiply Add --------------------------------------
23982
23983 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23984 predicate(UseAVX == 0);
23985 match(Set dst (MulAddVS2VI dst src1));
23986 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23987 ins_encode %{
23988 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23989 %}
23990 ins_pipe( pipe_slow );
23991 %}
23992
23993 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23994 predicate(UseAVX > 0);
23995 match(Set dst (MulAddVS2VI src1 src2));
23996 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23997 ins_encode %{
23998 int vlen_enc = vector_length_encoding(this);
23999 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24000 %}
24001 ins_pipe( pipe_slow );
24002 %}
24003
24004 // --------------------------------- Vector Multiply Add Add ----------------------------------
24005
24006 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
24007 predicate(VM_Version::supports_avx512_vnni());
24008 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
24009 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
24010 ins_encode %{
24011 assert(UseAVX > 2, "required");
24012 int vlen_enc = vector_length_encoding(this);
24013 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24014 %}
24015 ins_pipe( pipe_slow );
24016 ins_cost(10);
24017 %}
24018
24019 // --------------------------------- PopCount --------------------------------------
24020
24021 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
24022 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24023 match(Set dst (PopCountVI src));
24024 match(Set dst (PopCountVL src));
24025 format %{ "vector_popcount_integral $dst, $src" %}
24026 ins_encode %{
24027 int opcode = this->ideal_Opcode();
24028 int vlen_enc = vector_length_encoding(this, $src);
24029 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24030 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24031 %}
24032 ins_pipe( pipe_slow );
24033 %}
24034
24035 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24036 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24037 match(Set dst (PopCountVI src mask));
24038 match(Set dst (PopCountVL src mask));
24039 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24040 ins_encode %{
24041 int vlen_enc = vector_length_encoding(this, $src);
24042 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24043 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24044 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24045 %}
24046 ins_pipe( pipe_slow );
24047 %}
24048
24049 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24050 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24051 match(Set dst (PopCountVI src));
24052 match(Set dst (PopCountVL src));
24053 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24054 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24055 ins_encode %{
24056 int opcode = this->ideal_Opcode();
24057 int vlen_enc = vector_length_encoding(this, $src);
24058 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24059 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24060 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24061 %}
24062 ins_pipe( pipe_slow );
24063 %}
24064
24065 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24066
24067 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24068 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24069 Matcher::vector_length_in_bytes(n->in(1))));
24070 match(Set dst (CountTrailingZerosV src));
24071 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24072 ins_cost(400);
24073 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24074 ins_encode %{
24075 int vlen_enc = vector_length_encoding(this, $src);
24076 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24077 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24078 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24079 %}
24080 ins_pipe( pipe_slow );
24081 %}
24082
24083 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24084 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24085 VM_Version::supports_avx512cd() &&
24086 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24087 match(Set dst (CountTrailingZerosV src));
24088 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24089 ins_cost(400);
24090 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24091 ins_encode %{
24092 int vlen_enc = vector_length_encoding(this, $src);
24093 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24094 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24095 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24096 %}
24097 ins_pipe( pipe_slow );
24098 %}
24099
24100 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24101 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24102 match(Set dst (CountTrailingZerosV src));
24103 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24104 ins_cost(400);
24105 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24106 ins_encode %{
24107 int vlen_enc = vector_length_encoding(this, $src);
24108 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24109 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24110 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24111 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24112 %}
24113 ins_pipe( pipe_slow );
24114 %}
24115
24116 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24117 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24118 match(Set dst (CountTrailingZerosV src));
24119 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24120 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24121 ins_encode %{
24122 int vlen_enc = vector_length_encoding(this, $src);
24123 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24124 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24125 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24126 %}
24127 ins_pipe( pipe_slow );
24128 %}
24129
24130
24131 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24132
24133 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24134 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24135 effect(TEMP dst);
24136 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24137 ins_encode %{
24138 int vector_len = vector_length_encoding(this);
24139 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24140 %}
24141 ins_pipe( pipe_slow );
24142 %}
24143
24144 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24145 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24146 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24147 effect(TEMP dst);
24148 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24149 ins_encode %{
24150 int vector_len = vector_length_encoding(this);
24151 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24152 %}
24153 ins_pipe( pipe_slow );
24154 %}
24155
24156 // --------------------------------- Rotation Operations ----------------------------------
24157 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24158 match(Set dst (RotateLeftV src shift));
24159 match(Set dst (RotateRightV src shift));
24160 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24161 ins_encode %{
24162 int opcode = this->ideal_Opcode();
24163 int vector_len = vector_length_encoding(this);
24164 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24165 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24166 %}
24167 ins_pipe( pipe_slow );
24168 %}
24169
24170 instruct vprorate(vec dst, vec src, vec shift) %{
24171 match(Set dst (RotateLeftV src shift));
24172 match(Set dst (RotateRightV src shift));
24173 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24174 ins_encode %{
24175 int opcode = this->ideal_Opcode();
24176 int vector_len = vector_length_encoding(this);
24177 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24178 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24179 %}
24180 ins_pipe( pipe_slow );
24181 %}
24182
24183 // ---------------------------------- Masked Operations ------------------------------------
24184 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24185 predicate(!n->in(3)->bottom_type()->isa_vectmask());
24186 match(Set dst (LoadVectorMasked mem mask));
24187 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24188 ins_encode %{
24189 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24190 int vlen_enc = vector_length_encoding(this);
24191 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24192 %}
24193 ins_pipe( pipe_slow );
24194 %}
24195
24196
24197 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24198 predicate(n->in(3)->bottom_type()->isa_vectmask());
24199 match(Set dst (LoadVectorMasked mem mask));
24200 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24201 ins_encode %{
24202 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24203 int vector_len = vector_length_encoding(this);
24204 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24205 %}
24206 ins_pipe( pipe_slow );
24207 %}
24208
24209 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24210 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24211 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24212 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24213 ins_encode %{
24214 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24215 int vlen_enc = vector_length_encoding(src_node);
24216 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24217 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24218 %}
24219 ins_pipe( pipe_slow );
24220 %}
24221
24222 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24223 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24224 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24225 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24226 ins_encode %{
24227 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24228 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24229 int vlen_enc = vector_length_encoding(src_node);
24230 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24231 %}
24232 ins_pipe( pipe_slow );
24233 %}
24234
24235 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24236 match(Set addr (VerifyVectorAlignment addr mask));
24237 effect(KILL cr);
24238 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24239 ins_encode %{
24240 Label Lskip;
24241 // check if masked bits of addr are zero
24242 __ testq($addr$$Register, $mask$$constant);
24243 __ jccb(Assembler::equal, Lskip);
24244 __ stop("verify_vector_alignment found a misaligned vector memory access");
24245 __ bind(Lskip);
24246 %}
24247 ins_pipe(pipe_slow);
24248 %}
24249
24250 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24251 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24252 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24253 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24254 ins_encode %{
24255 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24256 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24257
24258 Label DONE;
24259 int vlen_enc = vector_length_encoding(this, $src1);
24260 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24261
24262 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24263 __ mov64($dst$$Register, -1L);
24264 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24265 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24266 __ jccb(Assembler::carrySet, DONE);
24267 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24268 __ notq($dst$$Register);
24269 __ tzcntq($dst$$Register, $dst$$Register);
24270 __ bind(DONE);
24271 %}
24272 ins_pipe( pipe_slow );
24273 %}
24274
24275
24276 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24277 match(Set dst (VectorMaskGen len));
24278 effect(TEMP temp, KILL cr);
24279 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24280 ins_encode %{
24281 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24282 %}
24283 ins_pipe( pipe_slow );
24284 %}
24285
24286 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24287 match(Set dst (VectorMaskGen len));
24288 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24289 effect(TEMP temp);
24290 ins_encode %{
24291 if ($len$$constant > 0) {
24292 __ mov64($temp$$Register, right_n_bits($len$$constant));
24293 __ kmovql($dst$$KRegister, $temp$$Register);
24294 } else {
24295 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
24296 }
24297 %}
24298 ins_pipe( pipe_slow );
24299 %}
24300
24301 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24302 predicate(n->in(1)->bottom_type()->isa_vectmask());
24303 match(Set dst (VectorMaskToLong mask));
24304 effect(TEMP dst, KILL cr);
24305 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24306 ins_encode %{
24307 int opcode = this->ideal_Opcode();
24308 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24309 int mask_len = Matcher::vector_length(this, $mask);
24310 int mask_size = mask_len * type2aelembytes(mbt);
24311 int vlen_enc = vector_length_encoding(this, $mask);
24312 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24313 $dst$$Register, mask_len, mask_size, vlen_enc);
24314 %}
24315 ins_pipe( pipe_slow );
24316 %}
24317
24318 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24319 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24320 match(Set dst (VectorMaskToLong mask));
24321 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24322 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24323 ins_encode %{
24324 int opcode = this->ideal_Opcode();
24325 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24326 int mask_len = Matcher::vector_length(this, $mask);
24327 int vlen_enc = vector_length_encoding(this, $mask);
24328 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24329 $dst$$Register, mask_len, mbt, vlen_enc);
24330 %}
24331 ins_pipe( pipe_slow );
24332 %}
24333
24334 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24335 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24336 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24337 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24338 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24339 ins_encode %{
24340 int opcode = this->ideal_Opcode();
24341 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24342 int mask_len = Matcher::vector_length(this, $mask);
24343 int vlen_enc = vector_length_encoding(this, $mask);
24344 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24345 $dst$$Register, mask_len, mbt, vlen_enc);
24346 %}
24347 ins_pipe( pipe_slow );
24348 %}
24349
24350 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24351 predicate(n->in(1)->bottom_type()->isa_vectmask());
24352 match(Set dst (VectorMaskTrueCount mask));
24353 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24354 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24355 ins_encode %{
24356 int opcode = this->ideal_Opcode();
24357 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24358 int mask_len = Matcher::vector_length(this, $mask);
24359 int mask_size = mask_len * type2aelembytes(mbt);
24360 int vlen_enc = vector_length_encoding(this, $mask);
24361 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24362 $tmp$$Register, mask_len, mask_size, vlen_enc);
24363 %}
24364 ins_pipe( pipe_slow );
24365 %}
24366
24367 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24368 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24369 match(Set dst (VectorMaskTrueCount mask));
24370 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24371 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24372 ins_encode %{
24373 int opcode = this->ideal_Opcode();
24374 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24375 int mask_len = Matcher::vector_length(this, $mask);
24376 int vlen_enc = vector_length_encoding(this, $mask);
24377 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24378 $tmp$$Register, mask_len, mbt, vlen_enc);
24379 %}
24380 ins_pipe( pipe_slow );
24381 %}
24382
24383 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24384 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24385 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24386 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24387 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24388 ins_encode %{
24389 int opcode = this->ideal_Opcode();
24390 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24391 int mask_len = Matcher::vector_length(this, $mask);
24392 int vlen_enc = vector_length_encoding(this, $mask);
24393 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24394 $tmp$$Register, mask_len, mbt, vlen_enc);
24395 %}
24396 ins_pipe( pipe_slow );
24397 %}
24398
24399 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24400 predicate(n->in(1)->bottom_type()->isa_vectmask());
24401 match(Set dst (VectorMaskFirstTrue mask));
24402 match(Set dst (VectorMaskLastTrue mask));
24403 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24404 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24405 ins_encode %{
24406 int opcode = this->ideal_Opcode();
24407 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24408 int mask_len = Matcher::vector_length(this, $mask);
24409 int mask_size = mask_len * type2aelembytes(mbt);
24410 int vlen_enc = vector_length_encoding(this, $mask);
24411 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24412 $tmp$$Register, mask_len, mask_size, vlen_enc);
24413 %}
24414 ins_pipe( pipe_slow );
24415 %}
24416
24417 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24418 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24419 match(Set dst (VectorMaskFirstTrue mask));
24420 match(Set dst (VectorMaskLastTrue mask));
24421 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24422 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24423 ins_encode %{
24424 int opcode = this->ideal_Opcode();
24425 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24426 int mask_len = Matcher::vector_length(this, $mask);
24427 int vlen_enc = vector_length_encoding(this, $mask);
24428 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24429 $tmp$$Register, mask_len, mbt, vlen_enc);
24430 %}
24431 ins_pipe( pipe_slow );
24432 %}
24433
24434 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24435 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24436 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24437 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24438 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24439 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24440 ins_encode %{
24441 int opcode = this->ideal_Opcode();
24442 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24443 int mask_len = Matcher::vector_length(this, $mask);
24444 int vlen_enc = vector_length_encoding(this, $mask);
24445 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24446 $tmp$$Register, mask_len, mbt, vlen_enc);
24447 %}
24448 ins_pipe( pipe_slow );
24449 %}
24450
24451 // --------------------------------- Compress/Expand Operations ---------------------------
24452 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24453 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24454 match(Set dst (CompressV src mask));
24455 match(Set dst (ExpandV src mask));
24456 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24457 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24458 ins_encode %{
24459 int opcode = this->ideal_Opcode();
24460 int vlen_enc = vector_length_encoding(this);
24461 BasicType bt = Matcher::vector_element_basic_type(this);
24462 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24463 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24464 %}
24465 ins_pipe( pipe_slow );
24466 %}
24467
24468 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24469 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24470 match(Set dst (CompressV src mask));
24471 match(Set dst (ExpandV src mask));
24472 format %{ "vector_compress_expand $dst, $src, $mask" %}
24473 ins_encode %{
24474 int opcode = this->ideal_Opcode();
24475 int vector_len = vector_length_encoding(this);
24476 BasicType bt = Matcher::vector_element_basic_type(this);
24477 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24478 %}
24479 ins_pipe( pipe_slow );
24480 %}
24481
24482 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24483 match(Set dst (CompressM mask));
24484 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24485 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24486 ins_encode %{
24487 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24488 int mask_len = Matcher::vector_length(this);
24489 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24490 %}
24491 ins_pipe( pipe_slow );
24492 %}
24493
24494 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24495
24496 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24497 predicate(!VM_Version::supports_gfni());
24498 match(Set dst (ReverseV src));
24499 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24500 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24501 ins_encode %{
24502 int vec_enc = vector_length_encoding(this);
24503 BasicType bt = Matcher::vector_element_basic_type(this);
24504 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24505 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24506 %}
24507 ins_pipe( pipe_slow );
24508 %}
24509
24510 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24511 predicate(VM_Version::supports_gfni());
24512 match(Set dst (ReverseV src));
24513 effect(TEMP dst, TEMP xtmp);
24514 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24515 ins_encode %{
24516 int vec_enc = vector_length_encoding(this);
24517 BasicType bt = Matcher::vector_element_basic_type(this);
24518 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24519 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24520 $xtmp$$XMMRegister);
24521 %}
24522 ins_pipe( pipe_slow );
24523 %}
24524
24525 instruct vreverse_byte_reg(vec dst, vec src) %{
24526 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24527 match(Set dst (ReverseBytesV src));
24528 effect(TEMP dst);
24529 format %{ "vector_reverse_byte $dst, $src" %}
24530 ins_encode %{
24531 int vec_enc = vector_length_encoding(this);
24532 BasicType bt = Matcher::vector_element_basic_type(this);
24533 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24534 %}
24535 ins_pipe( pipe_slow );
24536 %}
24537
24538 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24539 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24540 match(Set dst (ReverseBytesV src));
24541 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24542 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24543 ins_encode %{
24544 int vec_enc = vector_length_encoding(this);
24545 BasicType bt = Matcher::vector_element_basic_type(this);
24546 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24547 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24548 %}
24549 ins_pipe( pipe_slow );
24550 %}
24551
24552 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24553
24554 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24555 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24556 Matcher::vector_length_in_bytes(n->in(1))));
24557 match(Set dst (CountLeadingZerosV src));
24558 format %{ "vector_count_leading_zeros $dst, $src" %}
24559 ins_encode %{
24560 int vlen_enc = vector_length_encoding(this, $src);
24561 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24562 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24563 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24564 %}
24565 ins_pipe( pipe_slow );
24566 %}
24567
24568 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24569 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24570 Matcher::vector_length_in_bytes(n->in(1))));
24571 match(Set dst (CountLeadingZerosV src mask));
24572 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24573 ins_encode %{
24574 int vlen_enc = vector_length_encoding(this, $src);
24575 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24576 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24577 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24578 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24579 %}
24580 ins_pipe( pipe_slow );
24581 %}
24582
24583 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24584 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24585 VM_Version::supports_avx512cd() &&
24586 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24587 match(Set dst (CountLeadingZerosV src));
24588 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24589 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24590 ins_encode %{
24591 int vlen_enc = vector_length_encoding(this, $src);
24592 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24593 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24594 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24595 %}
24596 ins_pipe( pipe_slow );
24597 %}
24598
24599 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24600 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24601 match(Set dst (CountLeadingZerosV src));
24602 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24603 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24604 ins_encode %{
24605 int vlen_enc = vector_length_encoding(this, $src);
24606 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24607 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24608 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24609 $rtmp$$Register, true, vlen_enc);
24610 %}
24611 ins_pipe( pipe_slow );
24612 %}
24613
24614 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24615 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24616 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24617 match(Set dst (CountLeadingZerosV src));
24618 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24619 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24620 ins_encode %{
24621 int vlen_enc = vector_length_encoding(this, $src);
24622 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24623 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24624 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24625 %}
24626 ins_pipe( pipe_slow );
24627 %}
24628
24629 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24630 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24631 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24632 match(Set dst (CountLeadingZerosV src));
24633 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24634 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24635 ins_encode %{
24636 int vlen_enc = vector_length_encoding(this, $src);
24637 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24638 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24639 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24640 %}
24641 ins_pipe( pipe_slow );
24642 %}
24643
24644 // ---------------------------------- Vector Masked Operations ------------------------------------
24645
24646 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24647 match(Set dst (AddVB (Binary dst src2) mask));
24648 match(Set dst (AddVS (Binary dst src2) mask));
24649 match(Set dst (AddVI (Binary dst src2) mask));
24650 match(Set dst (AddVL (Binary dst src2) mask));
24651 match(Set dst (AddVF (Binary dst src2) mask));
24652 match(Set dst (AddVD (Binary dst src2) mask));
24653 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24654 ins_encode %{
24655 int vlen_enc = vector_length_encoding(this);
24656 BasicType bt = Matcher::vector_element_basic_type(this);
24657 int opc = this->ideal_Opcode();
24658 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24659 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24660 %}
24661 ins_pipe( pipe_slow );
24662 %}
24663
24664 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24665 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24666 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24667 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24668 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24669 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24670 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24671 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24672 ins_encode %{
24673 int vlen_enc = vector_length_encoding(this);
24674 BasicType bt = Matcher::vector_element_basic_type(this);
24675 int opc = this->ideal_Opcode();
24676 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24677 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24678 %}
24679 ins_pipe( pipe_slow );
24680 %}
24681
24682 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24683 match(Set dst (XorV (Binary dst src2) mask));
24684 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24685 ins_encode %{
24686 int vlen_enc = vector_length_encoding(this);
24687 BasicType bt = Matcher::vector_element_basic_type(this);
24688 int opc = this->ideal_Opcode();
24689 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24690 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24691 %}
24692 ins_pipe( pipe_slow );
24693 %}
24694
24695 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24696 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24697 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24698 ins_encode %{
24699 int vlen_enc = vector_length_encoding(this);
24700 BasicType bt = Matcher::vector_element_basic_type(this);
24701 int opc = this->ideal_Opcode();
24702 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24703 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24704 %}
24705 ins_pipe( pipe_slow );
24706 %}
24707
24708 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24709 match(Set dst (OrV (Binary dst src2) mask));
24710 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24711 ins_encode %{
24712 int vlen_enc = vector_length_encoding(this);
24713 BasicType bt = Matcher::vector_element_basic_type(this);
24714 int opc = this->ideal_Opcode();
24715 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24716 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24717 %}
24718 ins_pipe( pipe_slow );
24719 %}
24720
24721 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24722 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24723 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24724 ins_encode %{
24725 int vlen_enc = vector_length_encoding(this);
24726 BasicType bt = Matcher::vector_element_basic_type(this);
24727 int opc = this->ideal_Opcode();
24728 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24729 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24730 %}
24731 ins_pipe( pipe_slow );
24732 %}
24733
24734 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24735 match(Set dst (AndV (Binary dst src2) mask));
24736 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24737 ins_encode %{
24738 int vlen_enc = vector_length_encoding(this);
24739 BasicType bt = Matcher::vector_element_basic_type(this);
24740 int opc = this->ideal_Opcode();
24741 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24742 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24743 %}
24744 ins_pipe( pipe_slow );
24745 %}
24746
24747 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24748 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24749 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24750 ins_encode %{
24751 int vlen_enc = vector_length_encoding(this);
24752 BasicType bt = Matcher::vector_element_basic_type(this);
24753 int opc = this->ideal_Opcode();
24754 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24755 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24756 %}
24757 ins_pipe( pipe_slow );
24758 %}
24759
24760 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24761 match(Set dst (SubVB (Binary dst src2) mask));
24762 match(Set dst (SubVS (Binary dst src2) mask));
24763 match(Set dst (SubVI (Binary dst src2) mask));
24764 match(Set dst (SubVL (Binary dst src2) mask));
24765 match(Set dst (SubVF (Binary dst src2) mask));
24766 match(Set dst (SubVD (Binary dst src2) mask));
24767 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24768 ins_encode %{
24769 int vlen_enc = vector_length_encoding(this);
24770 BasicType bt = Matcher::vector_element_basic_type(this);
24771 int opc = this->ideal_Opcode();
24772 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24773 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24774 %}
24775 ins_pipe( pipe_slow );
24776 %}
24777
24778 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24779 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24780 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24781 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24782 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24783 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24784 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24785 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24786 ins_encode %{
24787 int vlen_enc = vector_length_encoding(this);
24788 BasicType bt = Matcher::vector_element_basic_type(this);
24789 int opc = this->ideal_Opcode();
24790 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24791 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24792 %}
24793 ins_pipe( pipe_slow );
24794 %}
24795
24796 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24797 match(Set dst (MulVS (Binary dst src2) mask));
24798 match(Set dst (MulVI (Binary dst src2) mask));
24799 match(Set dst (MulVL (Binary dst src2) mask));
24800 match(Set dst (MulVF (Binary dst src2) mask));
24801 match(Set dst (MulVD (Binary dst src2) mask));
24802 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24803 ins_encode %{
24804 int vlen_enc = vector_length_encoding(this);
24805 BasicType bt = Matcher::vector_element_basic_type(this);
24806 int opc = this->ideal_Opcode();
24807 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24808 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24809 %}
24810 ins_pipe( pipe_slow );
24811 %}
24812
24813 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24814 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24815 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24816 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24817 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24818 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24819 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24820 ins_encode %{
24821 int vlen_enc = vector_length_encoding(this);
24822 BasicType bt = Matcher::vector_element_basic_type(this);
24823 int opc = this->ideal_Opcode();
24824 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24825 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24826 %}
24827 ins_pipe( pipe_slow );
24828 %}
24829
24830 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24831 match(Set dst (SqrtVF dst mask));
24832 match(Set dst (SqrtVD dst mask));
24833 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24834 ins_encode %{
24835 int vlen_enc = vector_length_encoding(this);
24836 BasicType bt = Matcher::vector_element_basic_type(this);
24837 int opc = this->ideal_Opcode();
24838 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24839 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24840 %}
24841 ins_pipe( pipe_slow );
24842 %}
24843
24844 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24845 match(Set dst (DivVF (Binary dst src2) mask));
24846 match(Set dst (DivVD (Binary dst src2) mask));
24847 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24848 ins_encode %{
24849 int vlen_enc = vector_length_encoding(this);
24850 BasicType bt = Matcher::vector_element_basic_type(this);
24851 int opc = this->ideal_Opcode();
24852 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24853 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24854 %}
24855 ins_pipe( pipe_slow );
24856 %}
24857
24858 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24859 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24860 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24861 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24862 ins_encode %{
24863 int vlen_enc = vector_length_encoding(this);
24864 BasicType bt = Matcher::vector_element_basic_type(this);
24865 int opc = this->ideal_Opcode();
24866 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24867 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24868 %}
24869 ins_pipe( pipe_slow );
24870 %}
24871
24872
24873 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24874 match(Set dst (RotateLeftV (Binary dst shift) mask));
24875 match(Set dst (RotateRightV (Binary dst shift) mask));
24876 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24877 ins_encode %{
24878 int vlen_enc = vector_length_encoding(this);
24879 BasicType bt = Matcher::vector_element_basic_type(this);
24880 int opc = this->ideal_Opcode();
24881 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24882 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24883 %}
24884 ins_pipe( pipe_slow );
24885 %}
24886
24887 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24888 match(Set dst (RotateLeftV (Binary dst src2) mask));
24889 match(Set dst (RotateRightV (Binary dst src2) mask));
24890 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24891 ins_encode %{
24892 int vlen_enc = vector_length_encoding(this);
24893 BasicType bt = Matcher::vector_element_basic_type(this);
24894 int opc = this->ideal_Opcode();
24895 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24896 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24897 %}
24898 ins_pipe( pipe_slow );
24899 %}
24900
24901 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24902 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24903 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24904 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24905 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24906 ins_encode %{
24907 int vlen_enc = vector_length_encoding(this);
24908 BasicType bt = Matcher::vector_element_basic_type(this);
24909 int opc = this->ideal_Opcode();
24910 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24911 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24912 %}
24913 ins_pipe( pipe_slow );
24914 %}
24915
24916 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24917 predicate(!n->as_ShiftV()->is_var_shift());
24918 match(Set dst (LShiftVS (Binary dst src2) mask));
24919 match(Set dst (LShiftVI (Binary dst src2) mask));
24920 match(Set dst (LShiftVL (Binary dst src2) mask));
24921 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24922 ins_encode %{
24923 int vlen_enc = vector_length_encoding(this);
24924 BasicType bt = Matcher::vector_element_basic_type(this);
24925 int opc = this->ideal_Opcode();
24926 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24927 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24928 %}
24929 ins_pipe( pipe_slow );
24930 %}
24931
24932 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24933 predicate(n->as_ShiftV()->is_var_shift());
24934 match(Set dst (LShiftVS (Binary dst src2) mask));
24935 match(Set dst (LShiftVI (Binary dst src2) mask));
24936 match(Set dst (LShiftVL (Binary dst src2) mask));
24937 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24938 ins_encode %{
24939 int vlen_enc = vector_length_encoding(this);
24940 BasicType bt = Matcher::vector_element_basic_type(this);
24941 int opc = this->ideal_Opcode();
24942 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24943 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24944 %}
24945 ins_pipe( pipe_slow );
24946 %}
24947
24948 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24949 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24950 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24951 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24952 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24953 ins_encode %{
24954 int vlen_enc = vector_length_encoding(this);
24955 BasicType bt = Matcher::vector_element_basic_type(this);
24956 int opc = this->ideal_Opcode();
24957 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24958 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24959 %}
24960 ins_pipe( pipe_slow );
24961 %}
24962
24963 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24964 predicate(!n->as_ShiftV()->is_var_shift());
24965 match(Set dst (RShiftVS (Binary dst src2) mask));
24966 match(Set dst (RShiftVI (Binary dst src2) mask));
24967 match(Set dst (RShiftVL (Binary dst src2) mask));
24968 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24969 ins_encode %{
24970 int vlen_enc = vector_length_encoding(this);
24971 BasicType bt = Matcher::vector_element_basic_type(this);
24972 int opc = this->ideal_Opcode();
24973 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24974 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24975 %}
24976 ins_pipe( pipe_slow );
24977 %}
24978
24979 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24980 predicate(n->as_ShiftV()->is_var_shift());
24981 match(Set dst (RShiftVS (Binary dst src2) mask));
24982 match(Set dst (RShiftVI (Binary dst src2) mask));
24983 match(Set dst (RShiftVL (Binary dst src2) mask));
24984 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24985 ins_encode %{
24986 int vlen_enc = vector_length_encoding(this);
24987 BasicType bt = Matcher::vector_element_basic_type(this);
24988 int opc = this->ideal_Opcode();
24989 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24990 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24991 %}
24992 ins_pipe( pipe_slow );
24993 %}
24994
24995 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24996 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24997 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24998 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24999 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
25000 ins_encode %{
25001 int vlen_enc = vector_length_encoding(this);
25002 BasicType bt = Matcher::vector_element_basic_type(this);
25003 int opc = this->ideal_Opcode();
25004 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25005 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
25006 %}
25007 ins_pipe( pipe_slow );
25008 %}
25009
25010 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
25011 predicate(!n->as_ShiftV()->is_var_shift());
25012 match(Set dst (URShiftVS (Binary dst src2) mask));
25013 match(Set dst (URShiftVI (Binary dst src2) mask));
25014 match(Set dst (URShiftVL (Binary dst src2) mask));
25015 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25016 ins_encode %{
25017 int vlen_enc = vector_length_encoding(this);
25018 BasicType bt = Matcher::vector_element_basic_type(this);
25019 int opc = this->ideal_Opcode();
25020 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25021 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
25022 %}
25023 ins_pipe( pipe_slow );
25024 %}
25025
25026 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
25027 predicate(n->as_ShiftV()->is_var_shift());
25028 match(Set dst (URShiftVS (Binary dst src2) mask));
25029 match(Set dst (URShiftVI (Binary dst src2) mask));
25030 match(Set dst (URShiftVL (Binary dst src2) mask));
25031 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25032 ins_encode %{
25033 int vlen_enc = vector_length_encoding(this);
25034 BasicType bt = Matcher::vector_element_basic_type(this);
25035 int opc = this->ideal_Opcode();
25036 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25037 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25038 %}
25039 ins_pipe( pipe_slow );
25040 %}
25041
25042 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25043 match(Set dst (MaxV (Binary dst src2) mask));
25044 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25045 ins_encode %{
25046 int vlen_enc = vector_length_encoding(this);
25047 BasicType bt = Matcher::vector_element_basic_type(this);
25048 int opc = this->ideal_Opcode();
25049 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25050 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25051 %}
25052 ins_pipe( pipe_slow );
25053 %}
25054
25055 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25056 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25057 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25058 ins_encode %{
25059 int vlen_enc = vector_length_encoding(this);
25060 BasicType bt = Matcher::vector_element_basic_type(this);
25061 int opc = this->ideal_Opcode();
25062 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25063 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25064 %}
25065 ins_pipe( pipe_slow );
25066 %}
25067
25068 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25069 match(Set dst (MinV (Binary dst src2) mask));
25070 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25071 ins_encode %{
25072 int vlen_enc = vector_length_encoding(this);
25073 BasicType bt = Matcher::vector_element_basic_type(this);
25074 int opc = this->ideal_Opcode();
25075 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25076 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25077 %}
25078 ins_pipe( pipe_slow );
25079 %}
25080
25081 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25082 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25083 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25084 ins_encode %{
25085 int vlen_enc = vector_length_encoding(this);
25086 BasicType bt = Matcher::vector_element_basic_type(this);
25087 int opc = this->ideal_Opcode();
25088 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25089 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25090 %}
25091 ins_pipe( pipe_slow );
25092 %}
25093
25094 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25095 match(Set dst (VectorRearrange (Binary dst src2) mask));
25096 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25097 ins_encode %{
25098 int vlen_enc = vector_length_encoding(this);
25099 BasicType bt = Matcher::vector_element_basic_type(this);
25100 int opc = this->ideal_Opcode();
25101 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25102 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25103 %}
25104 ins_pipe( pipe_slow );
25105 %}
25106
25107 instruct vabs_masked(vec dst, kReg mask) %{
25108 match(Set dst (AbsVB dst mask));
25109 match(Set dst (AbsVS dst mask));
25110 match(Set dst (AbsVI dst mask));
25111 match(Set dst (AbsVL dst mask));
25112 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25113 ins_encode %{
25114 int vlen_enc = vector_length_encoding(this);
25115 BasicType bt = Matcher::vector_element_basic_type(this);
25116 int opc = this->ideal_Opcode();
25117 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25118 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25119 %}
25120 ins_pipe( pipe_slow );
25121 %}
25122
25123 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25124 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25125 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25126 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25127 ins_encode %{
25128 assert(UseFMA, "Needs FMA instructions support.");
25129 int vlen_enc = vector_length_encoding(this);
25130 BasicType bt = Matcher::vector_element_basic_type(this);
25131 int opc = this->ideal_Opcode();
25132 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25133 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25134 %}
25135 ins_pipe( pipe_slow );
25136 %}
25137
25138 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25139 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25140 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25141 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25142 ins_encode %{
25143 assert(UseFMA, "Needs FMA instructions support.");
25144 int vlen_enc = vector_length_encoding(this);
25145 BasicType bt = Matcher::vector_element_basic_type(this);
25146 int opc = this->ideal_Opcode();
25147 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25148 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25149 %}
25150 ins_pipe( pipe_slow );
25151 %}
25152
25153 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25154 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25155 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25156 ins_encode %{
25157 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25158 int vlen_enc = vector_length_encoding(this, $src1);
25159 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25160
25161 // Comparison i
25162 switch (src1_elem_bt) {
25163 case T_BYTE: {
25164 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25165 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25166 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25167 break;
25168 }
25169 case T_SHORT: {
25170 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25171 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25172 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25173 break;
25174 }
25175 case T_INT: {
25176 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25177 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25178 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25179 break;
25180 }
25181 case T_LONG: {
25182 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25183 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25184 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25185 break;
25186 }
25187 case T_FLOAT: {
25188 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25189 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25190 break;
25191 }
25192 case T_DOUBLE: {
25193 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25194 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25195 break;
25196 }
25197 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25198 }
25199 %}
25200 ins_pipe( pipe_slow );
25201 %}
25202
25203 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25204 predicate(Matcher::vector_length(n) <= 32);
25205 match(Set dst (MaskAll src));
25206 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25207 ins_encode %{
25208 int mask_len = Matcher::vector_length(this);
25209 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25210 %}
25211 ins_pipe( pipe_slow );
25212 %}
25213
25214 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25215 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25216 match(Set dst (XorVMask src (MaskAll cnt)));
25217 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25218 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25219 ins_encode %{
25220 uint masklen = Matcher::vector_length(this);
25221 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25222 %}
25223 ins_pipe( pipe_slow );
25224 %}
25225
25226 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25227 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25228 (Matcher::vector_length(n) == 16) ||
25229 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25230 match(Set dst (XorVMask src (MaskAll cnt)));
25231 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25232 ins_encode %{
25233 uint masklen = Matcher::vector_length(this);
25234 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25235 %}
25236 ins_pipe( pipe_slow );
25237 %}
25238
25239 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25240 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25241 match(Set dst (VectorLongToMask src));
25242 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25243 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25244 ins_encode %{
25245 int mask_len = Matcher::vector_length(this);
25246 int vec_enc = vector_length_encoding(mask_len);
25247 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25248 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25249 %}
25250 ins_pipe( pipe_slow );
25251 %}
25252
25253
25254 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25255 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25256 match(Set dst (VectorLongToMask src));
25257 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25258 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25259 ins_encode %{
25260 int mask_len = Matcher::vector_length(this);
25261 assert(mask_len <= 32, "invalid mask length");
25262 int vec_enc = vector_length_encoding(mask_len);
25263 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25264 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25265 %}
25266 ins_pipe( pipe_slow );
25267 %}
25268
25269 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25270 predicate(n->bottom_type()->isa_vectmask());
25271 match(Set dst (VectorLongToMask src));
25272 format %{ "long_to_mask_evex $dst, $src\t!" %}
25273 ins_encode %{
25274 __ kmov($dst$$KRegister, $src$$Register);
25275 %}
25276 ins_pipe( pipe_slow );
25277 %}
25278
25279 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25280 match(Set dst (AndVMask src1 src2));
25281 match(Set dst (OrVMask src1 src2));
25282 match(Set dst (XorVMask src1 src2));
25283 effect(TEMP kscratch);
25284 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25285 ins_encode %{
25286 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25287 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25288 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25289 uint masklen = Matcher::vector_length(this);
25290 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25291 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25292 %}
25293 ins_pipe( pipe_slow );
25294 %}
25295
25296 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25297 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25298 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25299 ins_encode %{
25300 int vlen_enc = vector_length_encoding(this);
25301 BasicType bt = Matcher::vector_element_basic_type(this);
25302 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25303 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25304 %}
25305 ins_pipe( pipe_slow );
25306 %}
25307
25308 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25309 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25310 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25311 ins_encode %{
25312 int vlen_enc = vector_length_encoding(this);
25313 BasicType bt = Matcher::vector_element_basic_type(this);
25314 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25315 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25316 %}
25317 ins_pipe( pipe_slow );
25318 %}
25319
25320 instruct castMM(kReg dst)
25321 %{
25322 match(Set dst (CastVV dst));
25323
25324 size(0);
25325 format %{ "# castVV of $dst" %}
25326 ins_encode(/* empty encoding */);
25327 ins_cost(0);
25328 ins_pipe(empty);
25329 %}
25330
25331 instruct castVV(vec dst)
25332 %{
25333 match(Set dst (CastVV dst));
25334
25335 size(0);
25336 format %{ "# castVV of $dst" %}
25337 ins_encode(/* empty encoding */);
25338 ins_cost(0);
25339 ins_pipe(empty);
25340 %}
25341
25342 instruct castVVLeg(legVec dst)
25343 %{
25344 match(Set dst (CastVV dst));
25345
25346 size(0);
25347 format %{ "# castVV of $dst" %}
25348 ins_encode(/* empty encoding */);
25349 ins_cost(0);
25350 ins_pipe(empty);
25351 %}
25352
25353 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25354 %{
25355 match(Set dst (IsInfiniteF src));
25356 effect(TEMP ktmp, KILL cr);
25357 format %{ "float_class_check $dst, $src" %}
25358 ins_encode %{
25359 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25360 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25361 %}
25362 ins_pipe(pipe_slow);
25363 %}
25364
25365 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25366 %{
25367 match(Set dst (IsInfiniteD src));
25368 effect(TEMP ktmp, KILL cr);
25369 format %{ "double_class_check $dst, $src" %}
25370 ins_encode %{
25371 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25372 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25373 %}
25374 ins_pipe(pipe_slow);
25375 %}
25376
25377 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25378 %{
25379 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25380 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25381 match(Set dst (SaturatingAddV src1 src2));
25382 match(Set dst (SaturatingSubV src1 src2));
25383 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25384 ins_encode %{
25385 int vlen_enc = vector_length_encoding(this);
25386 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25387 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25388 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25389 %}
25390 ins_pipe(pipe_slow);
25391 %}
25392
25393 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25394 %{
25395 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25396 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25397 match(Set dst (SaturatingAddV src1 src2));
25398 match(Set dst (SaturatingSubV src1 src2));
25399 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25400 ins_encode %{
25401 int vlen_enc = vector_length_encoding(this);
25402 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25403 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25404 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25405 %}
25406 ins_pipe(pipe_slow);
25407 %}
25408
25409 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25410 %{
25411 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25412 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25413 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25414 match(Set dst (SaturatingAddV src1 src2));
25415 match(Set dst (SaturatingSubV src1 src2));
25416 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25417 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25418 ins_encode %{
25419 int vlen_enc = vector_length_encoding(this);
25420 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25421 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25422 $src1$$XMMRegister, $src2$$XMMRegister,
25423 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25424 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25425 %}
25426 ins_pipe(pipe_slow);
25427 %}
25428
25429 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25430 %{
25431 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25432 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25433 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25434 match(Set dst (SaturatingAddV src1 src2));
25435 match(Set dst (SaturatingSubV src1 src2));
25436 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25437 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25438 ins_encode %{
25439 int vlen_enc = vector_length_encoding(this);
25440 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25441 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25442 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25443 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25444 %}
25445 ins_pipe(pipe_slow);
25446 %}
25447
25448 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25449 %{
25450 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25451 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25452 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25453 match(Set dst (SaturatingAddV src1 src2));
25454 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25455 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25456 ins_encode %{
25457 int vlen_enc = vector_length_encoding(this);
25458 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25459 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25460 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25461 %}
25462 ins_pipe(pipe_slow);
25463 %}
25464
25465 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25466 %{
25467 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25468 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25469 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25470 match(Set dst (SaturatingAddV src1 src2));
25471 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25472 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25473 ins_encode %{
25474 int vlen_enc = vector_length_encoding(this);
25475 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25476 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25477 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25478 %}
25479 ins_pipe(pipe_slow);
25480 %}
25481
25482 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25483 %{
25484 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25485 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25486 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25487 match(Set dst (SaturatingSubV src1 src2));
25488 effect(TEMP ktmp);
25489 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25490 ins_encode %{
25491 int vlen_enc = vector_length_encoding(this);
25492 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25493 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25494 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25495 %}
25496 ins_pipe(pipe_slow);
25497 %}
25498
25499 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25500 %{
25501 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25502 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25503 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25504 match(Set dst (SaturatingSubV src1 src2));
25505 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25506 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25507 ins_encode %{
25508 int vlen_enc = vector_length_encoding(this);
25509 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25510 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25511 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25512 %}
25513 ins_pipe(pipe_slow);
25514 %}
25515
25516 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25517 %{
25518 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25519 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25520 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25521 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25522 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25523 ins_encode %{
25524 int vlen_enc = vector_length_encoding(this);
25525 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25526 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25527 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25528 %}
25529 ins_pipe(pipe_slow);
25530 %}
25531
25532 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25533 %{
25534 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25535 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25536 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25537 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25538 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25539 ins_encode %{
25540 int vlen_enc = vector_length_encoding(this);
25541 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25542 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25543 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25544 %}
25545 ins_pipe(pipe_slow);
25546 %}
25547
25548 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25549 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25550 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25551 match(Set dst (SaturatingAddV (Binary dst src) mask));
25552 match(Set dst (SaturatingSubV (Binary dst src) mask));
25553 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25554 ins_encode %{
25555 int vlen_enc = vector_length_encoding(this);
25556 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25557 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25558 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25559 %}
25560 ins_pipe( pipe_slow );
25561 %}
25562
25563 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25564 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25565 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25566 match(Set dst (SaturatingAddV (Binary dst src) mask));
25567 match(Set dst (SaturatingSubV (Binary dst src) mask));
25568 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25569 ins_encode %{
25570 int vlen_enc = vector_length_encoding(this);
25571 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25572 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25573 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25574 %}
25575 ins_pipe( pipe_slow );
25576 %}
25577
25578 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25579 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25580 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25581 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25582 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25583 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25584 ins_encode %{
25585 int vlen_enc = vector_length_encoding(this);
25586 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25587 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25588 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25589 %}
25590 ins_pipe( pipe_slow );
25591 %}
25592
25593 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25594 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25595 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25596 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25597 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25598 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25599 ins_encode %{
25600 int vlen_enc = vector_length_encoding(this);
25601 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25602 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25603 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25604 %}
25605 ins_pipe( pipe_slow );
25606 %}
25607
25608 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25609 %{
25610 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25611 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25612 ins_encode %{
25613 int vlen_enc = vector_length_encoding(this);
25614 BasicType bt = Matcher::vector_element_basic_type(this);
25615 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25616 %}
25617 ins_pipe(pipe_slow);
25618 %}
25619
25620 instruct reinterpretS2HF(regF dst, rRegI src)
25621 %{
25622 match(Set dst (ReinterpretS2HF src));
25623 format %{ "evmovw $dst, $src" %}
25624 ins_encode %{
25625 __ evmovw($dst$$XMMRegister, $src$$Register);
25626 %}
25627 ins_pipe(pipe_slow);
25628 %}
25629
25630 instruct reinterpretHF2S(rRegI dst, regF src)
25631 %{
25632 match(Set dst (ReinterpretHF2S src));
25633 format %{ "evmovw $dst, $src" %}
25634 ins_encode %{
25635 __ evmovw($dst$$Register, $src$$XMMRegister);
25636 %}
25637 ins_pipe(pipe_slow);
25638 %}
25639
25640 instruct convF2HFAndS2HF(regF dst, regF src)
25641 %{
25642 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25643 format %{ "convF2HFAndS2HF $dst, $src" %}
25644 ins_encode %{
25645 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25646 %}
25647 ins_pipe(pipe_slow);
25648 %}
25649
25650 instruct convHF2SAndHF2F(regF dst, regF src)
25651 %{
25652 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25653 format %{ "convHF2SAndHF2F $dst, $src" %}
25654 ins_encode %{
25655 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25656 %}
25657 ins_pipe(pipe_slow);
25658 %}
25659
25660 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25661 %{
25662 match(Set dst (SqrtHF src));
25663 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25664 ins_encode %{
25665 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25666 %}
25667 ins_pipe(pipe_slow);
25668 %}
25669
25670 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25671 %{
25672 match(Set dst (AddHF src1 src2));
25673 match(Set dst (DivHF src1 src2));
25674 match(Set dst (MulHF src1 src2));
25675 match(Set dst (SubHF src1 src2));
25676 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25677 ins_encode %{
25678 int opcode = this->ideal_Opcode();
25679 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25680 %}
25681 ins_pipe(pipe_slow);
25682 %}
25683
25684 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25685 %{
25686 predicate(VM_Version::supports_avx10_2());
25687 match(Set dst (MaxHF src1 src2));
25688 match(Set dst (MinHF src1 src2));
25689
25690 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25691 ins_encode %{
25692 int opcode = this->ideal_Opcode();
25693 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25694 %}
25695 ins_pipe( pipe_slow );
25696 %}
25697
25698 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25699 %{
25700 predicate(!VM_Version::supports_avx10_2());
25701 match(Set dst (MaxHF src1 src2));
25702 match(Set dst (MinHF src1 src2));
25703 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25704
25705 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25706 ins_encode %{
25707 int opcode = this->ideal_Opcode();
25708 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25709 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25710 %}
25711 ins_pipe( pipe_slow );
25712 %}
25713
25714 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25715 %{
25716 match(Set dst (FmaHF src2 (Binary dst src1)));
25717 effect(DEF dst);
25718 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25719 ins_encode %{
25720 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25721 %}
25722 ins_pipe( pipe_slow );
25723 %}
25724
25725
25726 instruct vector_sqrt_HF_reg(vec dst, vec src)
25727 %{
25728 match(Set dst (SqrtVHF src));
25729 format %{ "vector_sqrt_fp16 $dst, $src" %}
25730 ins_encode %{
25731 int vlen_enc = vector_length_encoding(this);
25732 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25733 %}
25734 ins_pipe(pipe_slow);
25735 %}
25736
25737 instruct vector_sqrt_HF_mem(vec dst, memory src)
25738 %{
25739 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25740 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25741 ins_encode %{
25742 int vlen_enc = vector_length_encoding(this);
25743 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25744 %}
25745 ins_pipe(pipe_slow);
25746 %}
25747
25748 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25749 %{
25750 match(Set dst (AddVHF src1 src2));
25751 match(Set dst (DivVHF src1 src2));
25752 match(Set dst (MulVHF src1 src2));
25753 match(Set dst (SubVHF src1 src2));
25754 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25755 ins_encode %{
25756 int vlen_enc = vector_length_encoding(this);
25757 int opcode = this->ideal_Opcode();
25758 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25759 %}
25760 ins_pipe(pipe_slow);
25761 %}
25762
25763
25764 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25765 %{
25766 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25767 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25768 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25769 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25770 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25771 ins_encode %{
25772 int vlen_enc = vector_length_encoding(this);
25773 int opcode = this->ideal_Opcode();
25774 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25775 %}
25776 ins_pipe(pipe_slow);
25777 %}
25778
25779 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25780 %{
25781 match(Set dst (FmaVHF src2 (Binary dst src1)));
25782 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25783 ins_encode %{
25784 int vlen_enc = vector_length_encoding(this);
25785 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25786 %}
25787 ins_pipe( pipe_slow );
25788 %}
25789
25790 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25791 %{
25792 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25793 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25794 ins_encode %{
25795 int vlen_enc = vector_length_encoding(this);
25796 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25797 %}
25798 ins_pipe( pipe_slow );
25799 %}
25800
25801 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25802 %{
25803 predicate(VM_Version::supports_avx10_2());
25804 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25805 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25806 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25807 ins_encode %{
25808 int vlen_enc = vector_length_encoding(this);
25809 int opcode = this->ideal_Opcode();
25810 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25811 k0, vlen_enc);
25812 %}
25813 ins_pipe( pipe_slow );
25814 %}
25815
25816 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25817 %{
25818 predicate(VM_Version::supports_avx10_2());
25819 match(Set dst (MinVHF src1 src2));
25820 match(Set dst (MaxVHF src1 src2));
25821 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25822 ins_encode %{
25823 int vlen_enc = vector_length_encoding(this);
25824 int opcode = this->ideal_Opcode();
25825 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25826 k0, vlen_enc);
25827 %}
25828 ins_pipe( pipe_slow );
25829 %}
25830
25831 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25832 %{
25833 predicate(!VM_Version::supports_avx10_2());
25834 match(Set dst (MinVHF src1 src2));
25835 match(Set dst (MaxVHF src1 src2));
25836 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25837 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25838 ins_encode %{
25839 int vlen_enc = vector_length_encoding(this);
25840 int opcode = this->ideal_Opcode();
25841 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25842 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25843 %}
25844 ins_pipe( pipe_slow );
25845 %}
25846
25847 //----------PEEPHOLE RULES-----------------------------------------------------
25848 // These must follow all instruction definitions as they use the names
25849 // defined in the instructions definitions.
25850 //
25851 // peeppredicate ( rule_predicate );
25852 // // the predicate unless which the peephole rule will be ignored
25853 //
25854 // peepmatch ( root_instr_name [preceding_instruction]* );
25855 //
25856 // peepprocedure ( procedure_name );
25857 // // provide a procedure name to perform the optimization, the procedure should
25858 // // reside in the architecture dependent peephole file, the method has the
25859 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25860 // // with the arguments being the basic block, the current node index inside the
25861 // // block, the register allocator, the functions upon invoked return a new node
25862 // // defined in peepreplace, and the rules of the nodes appearing in the
25863 // // corresponding peepmatch, the function return true if successful, else
25864 // // return false
25865 //
25866 // peepconstraint %{
25867 // (instruction_number.operand_name relational_op instruction_number.operand_name
25868 // [, ...] );
25869 // // instruction numbers are zero-based using left to right order in peepmatch
25870 //
25871 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25872 // // provide an instruction_number.operand_name for each operand that appears
25873 // // in the replacement instruction's match rule
25874 //
25875 // ---------VM FLAGS---------------------------------------------------------
25876 //
25877 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25878 //
25879 // Each peephole rule is given an identifying number starting with zero and
25880 // increasing by one in the order seen by the parser. An individual peephole
25881 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25882 // on the command-line.
25883 //
25884 // ---------CURRENT LIMITATIONS----------------------------------------------
25885 //
25886 // Only transformations inside a basic block (do we need more for peephole)
25887 //
25888 // ---------EXAMPLE----------------------------------------------------------
25889 //
25890 // // pertinent parts of existing instructions in architecture description
25891 // instruct movI(rRegI dst, rRegI src)
25892 // %{
25893 // match(Set dst (CopyI src));
25894 // %}
25895 //
25896 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25897 // %{
25898 // match(Set dst (AddI dst src));
25899 // effect(KILL cr);
25900 // %}
25901 //
25902 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25903 // %{
25904 // match(Set dst (AddI dst src));
25905 // %}
25906 //
25907 // 1. Simple replacement
25908 // - Only match adjacent instructions in same basic block
25909 // - Only equality constraints
25910 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25911 // - Only one replacement instruction
25912 //
25913 // // Change (inc mov) to lea
25914 // peephole %{
25915 // // lea should only be emitted when beneficial
25916 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25917 // // increment preceded by register-register move
25918 // peepmatch ( incI_rReg movI );
25919 // // require that the destination register of the increment
25920 // // match the destination register of the move
25921 // peepconstraint ( 0.dst == 1.dst );
25922 // // construct a replacement instruction that sets
25923 // // the destination to ( move's source register + one )
25924 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25925 // %}
25926 //
25927 // 2. Procedural replacement
25928 // - More flexible finding relevent nodes
25929 // - More flexible constraints
25930 // - More flexible transformations
25931 // - May utilise architecture-dependent API more effectively
25932 // - Currently only one replacement instruction due to adlc parsing capabilities
25933 //
25934 // // Change (inc mov) to lea
25935 // peephole %{
25936 // // lea should only be emitted when beneficial
25937 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25938 // // the rule numbers of these nodes inside are passed into the function below
25939 // peepmatch ( incI_rReg movI );
25940 // // the method that takes the responsibility of transformation
25941 // peepprocedure ( inc_mov_to_lea );
25942 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25943 // // node is passed into the function above
25944 // peepreplace ( leaI_rReg_immI() );
25945 // %}
25946
25947 // These instructions is not matched by the matcher but used by the peephole
25948 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25949 %{
25950 predicate(false);
25951 match(Set dst (AddI src1 src2));
25952 format %{ "leal $dst, [$src1 + $src2]" %}
25953 ins_encode %{
25954 Register dst = $dst$$Register;
25955 Register src1 = $src1$$Register;
25956 Register src2 = $src2$$Register;
25957 if (src1 != rbp && src1 != r13) {
25958 __ leal(dst, Address(src1, src2, Address::times_1));
25959 } else {
25960 assert(src2 != rbp && src2 != r13, "");
25961 __ leal(dst, Address(src2, src1, Address::times_1));
25962 }
25963 %}
25964 ins_pipe(ialu_reg_reg);
25965 %}
25966
25967 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25968 %{
25969 predicate(false);
25970 match(Set dst (AddI src1 src2));
25971 format %{ "leal $dst, [$src1 + $src2]" %}
25972 ins_encode %{
25973 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25974 %}
25975 ins_pipe(ialu_reg_reg);
25976 %}
25977
25978 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25979 %{
25980 predicate(false);
25981 match(Set dst (LShiftI src shift));
25982 format %{ "leal $dst, [$src << $shift]" %}
25983 ins_encode %{
25984 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25985 Register src = $src$$Register;
25986 if (scale == Address::times_2 && src != rbp && src != r13) {
25987 __ leal($dst$$Register, Address(src, src, Address::times_1));
25988 } else {
25989 __ leal($dst$$Register, Address(noreg, src, scale));
25990 }
25991 %}
25992 ins_pipe(ialu_reg_reg);
25993 %}
25994
25995 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25996 %{
25997 predicate(false);
25998 match(Set dst (AddL src1 src2));
25999 format %{ "leaq $dst, [$src1 + $src2]" %}
26000 ins_encode %{
26001 Register dst = $dst$$Register;
26002 Register src1 = $src1$$Register;
26003 Register src2 = $src2$$Register;
26004 if (src1 != rbp && src1 != r13) {
26005 __ leaq(dst, Address(src1, src2, Address::times_1));
26006 } else {
26007 assert(src2 != rbp && src2 != r13, "");
26008 __ leaq(dst, Address(src2, src1, Address::times_1));
26009 }
26010 %}
26011 ins_pipe(ialu_reg_reg);
26012 %}
26013
26014 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
26015 %{
26016 predicate(false);
26017 match(Set dst (AddL src1 src2));
26018 format %{ "leaq $dst, [$src1 + $src2]" %}
26019 ins_encode %{
26020 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
26021 %}
26022 ins_pipe(ialu_reg_reg);
26023 %}
26024
26025 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
26026 %{
26027 predicate(false);
26028 match(Set dst (LShiftL src shift));
26029 format %{ "leaq $dst, [$src << $shift]" %}
26030 ins_encode %{
26031 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
26032 Register src = $src$$Register;
26033 if (scale == Address::times_2 && src != rbp && src != r13) {
26034 __ leaq($dst$$Register, Address(src, src, Address::times_1));
26035 } else {
26036 __ leaq($dst$$Register, Address(noreg, src, scale));
26037 }
26038 %}
26039 ins_pipe(ialu_reg_reg);
26040 %}
26041
26042 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26043 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26044 // processors with at least partial ALU support for lea
26045 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26046 // beneficial for processors with full ALU support
26047 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26048
26049 peephole
26050 %{
26051 peeppredicate(VM_Version::supports_fast_2op_lea());
26052 peepmatch (addI_rReg);
26053 peepprocedure (lea_coalesce_reg);
26054 peepreplace (leaI_rReg_rReg_peep());
26055 %}
26056
26057 peephole
26058 %{
26059 peeppredicate(VM_Version::supports_fast_2op_lea());
26060 peepmatch (addI_rReg_imm);
26061 peepprocedure (lea_coalesce_imm);
26062 peepreplace (leaI_rReg_immI_peep());
26063 %}
26064
26065 peephole
26066 %{
26067 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26068 VM_Version::is_intel_cascade_lake());
26069 peepmatch (incI_rReg);
26070 peepprocedure (lea_coalesce_imm);
26071 peepreplace (leaI_rReg_immI_peep());
26072 %}
26073
26074 peephole
26075 %{
26076 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26077 VM_Version::is_intel_cascade_lake());
26078 peepmatch (decI_rReg);
26079 peepprocedure (lea_coalesce_imm);
26080 peepreplace (leaI_rReg_immI_peep());
26081 %}
26082
26083 peephole
26084 %{
26085 peeppredicate(VM_Version::supports_fast_2op_lea());
26086 peepmatch (salI_rReg_immI2);
26087 peepprocedure (lea_coalesce_imm);
26088 peepreplace (leaI_rReg_immI2_peep());
26089 %}
26090
26091 peephole
26092 %{
26093 peeppredicate(VM_Version::supports_fast_2op_lea());
26094 peepmatch (addL_rReg);
26095 peepprocedure (lea_coalesce_reg);
26096 peepreplace (leaL_rReg_rReg_peep());
26097 %}
26098
26099 peephole
26100 %{
26101 peeppredicate(VM_Version::supports_fast_2op_lea());
26102 peepmatch (addL_rReg_imm);
26103 peepprocedure (lea_coalesce_imm);
26104 peepreplace (leaL_rReg_immL32_peep());
26105 %}
26106
26107 peephole
26108 %{
26109 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26110 VM_Version::is_intel_cascade_lake());
26111 peepmatch (incL_rReg);
26112 peepprocedure (lea_coalesce_imm);
26113 peepreplace (leaL_rReg_immL32_peep());
26114 %}
26115
26116 peephole
26117 %{
26118 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26119 VM_Version::is_intel_cascade_lake());
26120 peepmatch (decL_rReg);
26121 peepprocedure (lea_coalesce_imm);
26122 peepreplace (leaL_rReg_immL32_peep());
26123 %}
26124
26125 peephole
26126 %{
26127 peeppredicate(VM_Version::supports_fast_2op_lea());
26128 peepmatch (salL_rReg_immI2);
26129 peepprocedure (lea_coalesce_imm);
26130 peepreplace (leaL_rReg_immI2_peep());
26131 %}
26132
26133 peephole
26134 %{
26135 peepmatch (leaPCompressedOopOffset);
26136 peepprocedure (lea_remove_redundant);
26137 %}
26138
26139 peephole
26140 %{
26141 peepmatch (leaP8Narrow);
26142 peepprocedure (lea_remove_redundant);
26143 %}
26144
26145 peephole
26146 %{
26147 peepmatch (leaP32Narrow);
26148 peepprocedure (lea_remove_redundant);
26149 %}
26150
26151 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26152 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26153
26154 //int variant
26155 peephole
26156 %{
26157 peepmatch (testI_reg);
26158 peepprocedure (test_may_remove);
26159 %}
26160
26161 //long variant
26162 peephole
26163 %{
26164 peepmatch (testL_reg);
26165 peepprocedure (test_may_remove);
26166 %}
26167
26168
26169 //----------SMARTSPILL RULES---------------------------------------------------
26170 // These must follow all instruction definitions as they use the names
26171 // defined in the instructions definitions.