1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 Label done;
1708 __ movl(dst, -1);
1709 __ jcc(Assembler::parity, done);
1710 __ jcc(Assembler::below, done);
1711 __ setcc(Assembler::notEqual, dst);
1712 __ bind(done);
1713 }
1714
1715 // Math.min() # Math.max()
1716 // --------------------------
1717 // ucomis[s/d] #
1718 // ja -> b # a
1719 // jp -> NaN # NaN
1720 // jb -> a # b
1721 // je #
1722 // |-jz -> a | b # a & b
1723 // | -> a #
1724 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1725 XMMRegister a, XMMRegister b,
1726 XMMRegister xmmt, Register rt,
1727 bool min, bool single) {
1728
1729 Label nan, zero, below, above, done;
1730
1731 if (single)
1732 __ ucomiss(a, b);
1733 else
1734 __ ucomisd(a, b);
1735
1736 if (dst->encoding() != (min ? b : a)->encoding())
1737 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1738 else
1739 __ jccb(Assembler::above, done);
1740
1741 __ jccb(Assembler::parity, nan); // PF=1
1742 __ jccb(Assembler::below, below); // CF=1
1743
1744 // equal
1745 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1746 if (single) {
1747 __ ucomiss(a, xmmt);
1748 __ jccb(Assembler::equal, zero);
1749
1750 __ movflt(dst, a);
1751 __ jmp(done);
1752 }
1753 else {
1754 __ ucomisd(a, xmmt);
1755 __ jccb(Assembler::equal, zero);
1756
1757 __ movdbl(dst, a);
1758 __ jmp(done);
1759 }
1760
1761 __ bind(zero);
1762 if (min)
1763 __ vpor(dst, a, b, Assembler::AVX_128bit);
1764 else
1765 __ vpand(dst, a, b, Assembler::AVX_128bit);
1766
1767 __ jmp(done);
1768
1769 __ bind(above);
1770 if (single)
1771 __ movflt(dst, min ? b : a);
1772 else
1773 __ movdbl(dst, min ? b : a);
1774
1775 __ jmp(done);
1776
1777 __ bind(nan);
1778 if (single) {
1779 __ movl(rt, 0x7fc00000); // Float.NaN
1780 __ movdl(dst, rt);
1781 }
1782 else {
1783 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1784 __ movdq(dst, rt);
1785 }
1786 __ jmp(done);
1787
1788 __ bind(below);
1789 if (single)
1790 __ movflt(dst, min ? a : b);
1791 else
1792 __ movdbl(dst, min ? a : b);
1793
1794 __ bind(done);
1795 }
1796
1797 //=============================================================================
1798 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1799
1800 int ConstantTable::calculate_table_base_offset() const {
1801 return 0; // absolute addressing, no offset
1802 }
1803
1804 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1805 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1806 ShouldNotReachHere();
1807 }
1808
1809 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1810 // Empty encoding
1811 }
1812
1813 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1814 return 0;
1815 }
1816
1817 #ifndef PRODUCT
1818 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1819 st->print("# MachConstantBaseNode (empty encoding)");
1820 }
1821 #endif
1822
1823
1824 //=============================================================================
1825 #ifndef PRODUCT
1826 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1827 Compile* C = ra_->C;
1828
1829 int framesize = C->output()->frame_size_in_bytes();
1830 int bangsize = C->output()->bang_size_in_bytes();
1831 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1832 // Remove wordSize for return addr which is already pushed.
1833 framesize -= wordSize;
1834
1835 if (C->output()->need_stack_bang(bangsize)) {
1836 framesize -= wordSize;
1837 st->print("# stack bang (%d bytes)", bangsize);
1838 st->print("\n\t");
1839 st->print("pushq rbp\t# Save rbp");
1840 if (PreserveFramePointer) {
1841 st->print("\n\t");
1842 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1843 }
1844 if (framesize) {
1845 st->print("\n\t");
1846 st->print("subq rsp, #%d\t# Create frame",framesize);
1847 }
1848 } else {
1849 st->print("subq rsp, #%d\t# Create frame",framesize);
1850 st->print("\n\t");
1851 framesize -= wordSize;
1852 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1853 if (PreserveFramePointer) {
1854 st->print("\n\t");
1855 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1856 if (framesize > 0) {
1857 st->print("\n\t");
1858 st->print("addq rbp, #%d", framesize);
1859 }
1860 }
1861 }
1862
1863 if (VerifyStackAtCalls) {
1864 st->print("\n\t");
1865 framesize -= wordSize;
1866 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1867 #ifdef ASSERT
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1928 framesize -= 2*wordSize;
1929
1930 if (framesize) {
1931 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1932 st->print("\t");
1933 }
1934
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
1996 static enum RC rc_class(OptoReg::Name reg)
1997 {
1998 if( !OptoReg::is_valid(reg) ) return rc_bad;
1999
2000 if (OptoReg::is_stack(reg)) return rc_stack;
2001
2002 VMReg r = OptoReg::as_VMReg(reg);
2003
2004 if (r->is_Register()) return rc_int;
2005
2006 if (r->is_KRegister()) return rc_kreg;
2007
2008 assert(r->is_XMMRegister(), "must be");
2009 return rc_float;
2010 }
2011
2012 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2013 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2014 int src_hi, int dst_hi, uint ireg, outputStream* st);
2015
2016 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2017 int stack_offset, int reg, uint ireg, outputStream* st);
2018
2019 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2020 int dst_offset, uint ireg, outputStream* st) {
2021 if (masm) {
2022 switch (ireg) {
2023 case Op_VecS:
2024 __ movq(Address(rsp, -8), rax);
2025 __ movl(rax, Address(rsp, src_offset));
2026 __ movl(Address(rsp, dst_offset), rax);
2027 __ movq(rax, Address(rsp, -8));
2028 break;
2029 case Op_VecD:
2030 __ pushq(Address(rsp, src_offset));
2031 __ popq (Address(rsp, dst_offset));
2032 break;
2033 case Op_VecX:
2034 __ pushq(Address(rsp, src_offset));
2035 __ popq (Address(rsp, dst_offset));
2036 __ pushq(Address(rsp, src_offset+8));
2037 __ popq (Address(rsp, dst_offset+8));
2038 break;
2039 case Op_VecY:
2040 __ vmovdqu(Address(rsp, -32), xmm0);
2041 __ vmovdqu(xmm0, Address(rsp, src_offset));
2042 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2043 __ vmovdqu(xmm0, Address(rsp, -32));
2044 break;
2045 case Op_VecZ:
2046 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2047 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2048 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2049 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2050 break;
2051 default:
2052 ShouldNotReachHere();
2053 }
2054 #ifndef PRODUCT
2055 } else {
2056 switch (ireg) {
2057 case Op_VecS:
2058 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2059 "movl rax, [rsp + #%d]\n\t"
2060 "movl [rsp + #%d], rax\n\t"
2061 "movq rax, [rsp - #8]",
2062 src_offset, dst_offset);
2063 break;
2064 case Op_VecD:
2065 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2066 "popq [rsp + #%d]",
2067 src_offset, dst_offset);
2068 break;
2069 case Op_VecX:
2070 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2071 "popq [rsp + #%d]\n\t"
2072 "pushq [rsp + #%d]\n\t"
2073 "popq [rsp + #%d]",
2074 src_offset, dst_offset, src_offset+8, dst_offset+8);
2075 break;
2076 case Op_VecY:
2077 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2078 "vmovdqu xmm0, [rsp + #%d]\n\t"
2079 "vmovdqu [rsp + #%d], xmm0\n\t"
2080 "vmovdqu xmm0, [rsp - #32]",
2081 src_offset, dst_offset);
2082 break;
2083 case Op_VecZ:
2084 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2085 "vmovdqu xmm0, [rsp + #%d]\n\t"
2086 "vmovdqu [rsp + #%d], xmm0\n\t"
2087 "vmovdqu xmm0, [rsp - #64]",
2088 src_offset, dst_offset);
2089 break;
2090 default:
2091 ShouldNotReachHere();
2092 }
2093 #endif
2094 }
2095 }
2096
2097 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2098 PhaseRegAlloc* ra_,
2099 bool do_size,
2100 outputStream* st) const {
2101 assert(masm != nullptr || st != nullptr, "sanity");
2102 // Get registers to move
2103 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2104 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2105 OptoReg::Name dst_second = ra_->get_reg_second(this);
2106 OptoReg::Name dst_first = ra_->get_reg_first(this);
2107
2108 enum RC src_second_rc = rc_class(src_second);
2109 enum RC src_first_rc = rc_class(src_first);
2110 enum RC dst_second_rc = rc_class(dst_second);
2111 enum RC dst_first_rc = rc_class(dst_first);
2112
2113 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2114 "must move at least 1 register" );
2115
2116 if (src_first == dst_first && src_second == dst_second) {
2117 // Self copy, no move
2118 return 0;
2119 }
2120 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2121 uint ireg = ideal_reg();
2122 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2123 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2125 // mem -> mem
2126 int src_offset = ra_->reg2offset(src_first);
2127 int dst_offset = ra_->reg2offset(dst_first);
2128 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2129 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2130 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2131 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2132 int stack_offset = ra_->reg2offset(dst_first);
2133 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2134 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2135 int stack_offset = ra_->reg2offset(src_first);
2136 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2137 } else {
2138 ShouldNotReachHere();
2139 }
2140 return 0;
2141 }
2142 if (src_first_rc == rc_stack) {
2143 // mem ->
2144 if (dst_first_rc == rc_stack) {
2145 // mem -> mem
2146 assert(src_second != dst_first, "overlap");
2147 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2148 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2149 // 64-bit
2150 int src_offset = ra_->reg2offset(src_first);
2151 int dst_offset = ra_->reg2offset(dst_first);
2152 if (masm) {
2153 __ pushq(Address(rsp, src_offset));
2154 __ popq (Address(rsp, dst_offset));
2155 #ifndef PRODUCT
2156 } else {
2157 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2158 "popq [rsp + #%d]",
2159 src_offset, dst_offset);
2160 #endif
2161 }
2162 } else {
2163 // 32-bit
2164 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2165 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2166 // No pushl/popl, so:
2167 int src_offset = ra_->reg2offset(src_first);
2168 int dst_offset = ra_->reg2offset(dst_first);
2169 if (masm) {
2170 __ movq(Address(rsp, -8), rax);
2171 __ movl(rax, Address(rsp, src_offset));
2172 __ movl(Address(rsp, dst_offset), rax);
2173 __ movq(rax, Address(rsp, -8));
2174 #ifndef PRODUCT
2175 } else {
2176 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2177 "movl rax, [rsp + #%d]\n\t"
2178 "movl [rsp + #%d], rax\n\t"
2179 "movq rax, [rsp - #8]",
2180 src_offset, dst_offset);
2181 #endif
2182 }
2183 }
2184 return 0;
2185 } else if (dst_first_rc == rc_int) {
2186 // mem -> gpr
2187 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2188 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2189 // 64-bit
2190 int offset = ra_->reg2offset(src_first);
2191 if (masm) {
2192 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2193 #ifndef PRODUCT
2194 } else {
2195 st->print("movq %s, [rsp + #%d]\t# spill",
2196 Matcher::regName[dst_first],
2197 offset);
2198 #endif
2199 }
2200 } else {
2201 // 32-bit
2202 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2203 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2204 int offset = ra_->reg2offset(src_first);
2205 if (masm) {
2206 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2207 #ifndef PRODUCT
2208 } else {
2209 st->print("movl %s, [rsp + #%d]\t# spill",
2210 Matcher::regName[dst_first],
2211 offset);
2212 #endif
2213 }
2214 }
2215 return 0;
2216 } else if (dst_first_rc == rc_float) {
2217 // mem-> xmm
2218 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2219 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2220 // 64-bit
2221 int offset = ra_->reg2offset(src_first);
2222 if (masm) {
2223 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2224 #ifndef PRODUCT
2225 } else {
2226 st->print("%s %s, [rsp + #%d]\t# spill",
2227 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2228 Matcher::regName[dst_first],
2229 offset);
2230 #endif
2231 }
2232 } else {
2233 // 32-bit
2234 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2235 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2236 int offset = ra_->reg2offset(src_first);
2237 if (masm) {
2238 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2239 #ifndef PRODUCT
2240 } else {
2241 st->print("movss %s, [rsp + #%d]\t# spill",
2242 Matcher::regName[dst_first],
2243 offset);
2244 #endif
2245 }
2246 }
2247 return 0;
2248 } else if (dst_first_rc == rc_kreg) {
2249 // mem -> kreg
2250 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2251 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2252 // 64-bit
2253 int offset = ra_->reg2offset(src_first);
2254 if (masm) {
2255 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2256 #ifndef PRODUCT
2257 } else {
2258 st->print("kmovq %s, [rsp + #%d]\t# spill",
2259 Matcher::regName[dst_first],
2260 offset);
2261 #endif
2262 }
2263 }
2264 return 0;
2265 }
2266 } else if (src_first_rc == rc_int) {
2267 // gpr ->
2268 if (dst_first_rc == rc_stack) {
2269 // gpr -> mem
2270 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2271 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2272 // 64-bit
2273 int offset = ra_->reg2offset(dst_first);
2274 if (masm) {
2275 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2276 #ifndef PRODUCT
2277 } else {
2278 st->print("movq [rsp + #%d], %s\t# spill",
2279 offset,
2280 Matcher::regName[src_first]);
2281 #endif
2282 }
2283 } else {
2284 // 32-bit
2285 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2286 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2287 int offset = ra_->reg2offset(dst_first);
2288 if (masm) {
2289 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2290 #ifndef PRODUCT
2291 } else {
2292 st->print("movl [rsp + #%d], %s\t# spill",
2293 offset,
2294 Matcher::regName[src_first]);
2295 #endif
2296 }
2297 }
2298 return 0;
2299 } else if (dst_first_rc == rc_int) {
2300 // gpr -> gpr
2301 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2302 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2303 // 64-bit
2304 if (masm) {
2305 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2306 as_Register(Matcher::_regEncode[src_first]));
2307 #ifndef PRODUCT
2308 } else {
2309 st->print("movq %s, %s\t# spill",
2310 Matcher::regName[dst_first],
2311 Matcher::regName[src_first]);
2312 #endif
2313 }
2314 return 0;
2315 } else {
2316 // 32-bit
2317 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2318 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2319 if (masm) {
2320 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2321 as_Register(Matcher::_regEncode[src_first]));
2322 #ifndef PRODUCT
2323 } else {
2324 st->print("movl %s, %s\t# spill",
2325 Matcher::regName[dst_first],
2326 Matcher::regName[src_first]);
2327 #endif
2328 }
2329 return 0;
2330 }
2331 } else if (dst_first_rc == rc_float) {
2332 // gpr -> xmm
2333 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2334 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2335 // 64-bit
2336 if (masm) {
2337 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2338 #ifndef PRODUCT
2339 } else {
2340 st->print("movdq %s, %s\t# spill",
2341 Matcher::regName[dst_first],
2342 Matcher::regName[src_first]);
2343 #endif
2344 }
2345 } else {
2346 // 32-bit
2347 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2348 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2349 if (masm) {
2350 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2351 #ifndef PRODUCT
2352 } else {
2353 st->print("movdl %s, %s\t# spill",
2354 Matcher::regName[dst_first],
2355 Matcher::regName[src_first]);
2356 #endif
2357 }
2358 }
2359 return 0;
2360 } else if (dst_first_rc == rc_kreg) {
2361 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2362 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2363 // 64-bit
2364 if (masm) {
2365 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2366 #ifndef PRODUCT
2367 } else {
2368 st->print("kmovq %s, %s\t# spill",
2369 Matcher::regName[dst_first],
2370 Matcher::regName[src_first]);
2371 #endif
2372 }
2373 }
2374 Unimplemented();
2375 return 0;
2376 }
2377 } else if (src_first_rc == rc_float) {
2378 // xmm ->
2379 if (dst_first_rc == rc_stack) {
2380 // xmm -> mem
2381 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2382 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2383 // 64-bit
2384 int offset = ra_->reg2offset(dst_first);
2385 if (masm) {
2386 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2387 #ifndef PRODUCT
2388 } else {
2389 st->print("movsd [rsp + #%d], %s\t# spill",
2390 offset,
2391 Matcher::regName[src_first]);
2392 #endif
2393 }
2394 } else {
2395 // 32-bit
2396 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2397 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2398 int offset = ra_->reg2offset(dst_first);
2399 if (masm) {
2400 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2401 #ifndef PRODUCT
2402 } else {
2403 st->print("movss [rsp + #%d], %s\t# spill",
2404 offset,
2405 Matcher::regName[src_first]);
2406 #endif
2407 }
2408 }
2409 return 0;
2410 } else if (dst_first_rc == rc_int) {
2411 // xmm -> gpr
2412 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2413 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2414 // 64-bit
2415 if (masm) {
2416 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2417 #ifndef PRODUCT
2418 } else {
2419 st->print("movdq %s, %s\t# spill",
2420 Matcher::regName[dst_first],
2421 Matcher::regName[src_first]);
2422 #endif
2423 }
2424 } else {
2425 // 32-bit
2426 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2427 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2428 if (masm) {
2429 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2430 #ifndef PRODUCT
2431 } else {
2432 st->print("movdl %s, %s\t# spill",
2433 Matcher::regName[dst_first],
2434 Matcher::regName[src_first]);
2435 #endif
2436 }
2437 }
2438 return 0;
2439 } else if (dst_first_rc == rc_float) {
2440 // xmm -> xmm
2441 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2442 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2443 // 64-bit
2444 if (masm) {
2445 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2446 #ifndef PRODUCT
2447 } else {
2448 st->print("%s %s, %s\t# spill",
2449 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2450 Matcher::regName[dst_first],
2451 Matcher::regName[src_first]);
2452 #endif
2453 }
2454 } else {
2455 // 32-bit
2456 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2457 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2458 if (masm) {
2459 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2460 #ifndef PRODUCT
2461 } else {
2462 st->print("%s %s, %s\t# spill",
2463 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2464 Matcher::regName[dst_first],
2465 Matcher::regName[src_first]);
2466 #endif
2467 }
2468 }
2469 return 0;
2470 } else if (dst_first_rc == rc_kreg) {
2471 assert(false, "Illegal spilling");
2472 return 0;
2473 }
2474 } else if (src_first_rc == rc_kreg) {
2475 if (dst_first_rc == rc_stack) {
2476 // mem -> kreg
2477 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2478 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2479 // 64-bit
2480 int offset = ra_->reg2offset(dst_first);
2481 if (masm) {
2482 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2483 #ifndef PRODUCT
2484 } else {
2485 st->print("kmovq [rsp + #%d] , %s\t# spill",
2486 offset,
2487 Matcher::regName[src_first]);
2488 #endif
2489 }
2490 }
2491 return 0;
2492 } else if (dst_first_rc == rc_int) {
2493 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2494 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2495 // 64-bit
2496 if (masm) {
2497 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2498 #ifndef PRODUCT
2499 } else {
2500 st->print("kmovq %s, %s\t# spill",
2501 Matcher::regName[dst_first],
2502 Matcher::regName[src_first]);
2503 #endif
2504 }
2505 }
2506 Unimplemented();
2507 return 0;
2508 } else if (dst_first_rc == rc_kreg) {
2509 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2510 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2511 // 64-bit
2512 if (masm) {
2513 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2514 #ifndef PRODUCT
2515 } else {
2516 st->print("kmovq %s, %s\t# spill",
2517 Matcher::regName[dst_first],
2518 Matcher::regName[src_first]);
2519 #endif
2520 }
2521 }
2522 return 0;
2523 } else if (dst_first_rc == rc_float) {
2524 assert(false, "Illegal spill");
2525 return 0;
2526 }
2527 }
2528
2529 assert(0," foo ");
2530 Unimplemented();
2531 return 0;
2532 }
2533
2534 #ifndef PRODUCT
2535 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2536 implementation(nullptr, ra_, false, st);
2537 }
2538 #endif
2539
2540 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2541 implementation(masm, ra_, false, nullptr);
2542 }
2543
2544 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2545 return MachNode::size(ra_);
2546 }
2547
2548 //=============================================================================
2549 #ifndef PRODUCT
2550 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2551 {
2552 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2553 int reg = ra_->get_reg_first(this);
2554 st->print("leaq %s, [rsp + #%d]\t# box lock",
2555 Matcher::regName[reg], offset);
2556 }
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
2653 }
2654
2655 static bool is_demotion_candidate(const MachNode* mdef) {
2656 return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
2657 }
2658
2659 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2660 int oper_index) {
2661 if (mdef == nullptr) {
2662 return false;
2663 }
2664
2665 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2666 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2667 assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
2668 assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
2669 return false;
2670 }
2671
2672 // Complex memory operand covers multiple incoming edges needed for
2673 // address computation. Biasing def towards any address component will not
2674 // result in NDD demotion by assembler.
2675 if (mdef->operand_num_edges(oper_index) != 1) {
2676 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2677 return false;
2678 }
2679
2680 // Demotion candidate must be register mask compatible with definition.
2681 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2682 if (!oper_mask.overlap(mdef->out_RegMask())) {
2683 assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
2684 return false;
2685 }
2686
2687 switch (oper_index) {
2688 // First operand of MachNode corresponding to Intel APX NDD selection
2689 // pattern can share its assigned register with definition operand if
2690 // their live ranges do not overlap. In such a scenario we can demote
2691 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2692 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2693 // are decorated with a special flag by instruction selector.
2694 case 1:
2695 return is_demotion_candidate(mdef);
2696
2697 // Definition operand of commutative operation can be biased towards second
2698 // operand.
2699 case 2:
2700 return is_ndd_demotable_commutative(mdef);
2701
2702 // Current scheme only selects up to two biasing candidates
2703 default:
2704 assert(false, "unhandled operand index: %s", mdef->Name());
2705 break;
2706 }
2707
2708 return false;
2709 }
2710
2711 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2712 assert(EnableVectorSupport, "sanity");
2713 int lo = XMM0_num;
2714 int hi = XMM0b_num;
2715 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2716 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2717 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2718 return OptoRegPair(hi, lo);
2719 }
2720
2721 // Is this branch offset short enough that a short branch can be used?
2722 //
2723 // NOTE: If the platform does not provide any short branch variants, then
2724 // this method should return false for offset 0.
2725 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2726 // The passed offset is relative to address of the branch.
2727 // On 86 a branch displacement is calculated relative to address
2728 // of a next instruction.
2729 offset -= br_size;
2730
2731 // the short version of jmpConUCF2 contains multiple branches,
2732 // making the reach slightly less
2733 if (rule == jmpConUCF2_rule)
2734 return (-126 <= offset && offset <= 125);
2735 return (-128 <= offset && offset <= 127);
2736 }
2737
2738 // Return whether or not this register is ever used as an argument.
2739 // This function is used on startup to build the trampoline stubs in
2740 // generateOptoStub. Registers not mentioned will be killed by the VM
2741 // call in the trampoline, and arguments in those registers not be
2742 // available to the callee.
2743 bool Matcher::can_be_java_arg(int reg)
2744 {
2745 return
2746 reg == RDI_num || reg == RDI_H_num ||
2747 reg == RSI_num || reg == RSI_H_num ||
2748 reg == RDX_num || reg == RDX_H_num ||
2749 reg == RCX_num || reg == RCX_H_num ||
2750 reg == R8_num || reg == R8_H_num ||
2751 reg == R9_num || reg == R9_H_num ||
2752 reg == R12_num || reg == R12_H_num ||
2753 reg == XMM0_num || reg == XMM0b_num ||
2754 reg == XMM1_num || reg == XMM1b_num ||
2755 reg == XMM2_num || reg == XMM2b_num ||
2756 reg == XMM3_num || reg == XMM3b_num ||
2757 reg == XMM4_num || reg == XMM4b_num ||
2758 reg == XMM5_num || reg == XMM5b_num ||
2759 reg == XMM6_num || reg == XMM6b_num ||
2760 reg == XMM7_num || reg == XMM7b_num;
2761 }
2762
2763 bool Matcher::is_spillable_arg(int reg)
2764 {
2765 return can_be_java_arg(reg);
2766 }
2767
2768 uint Matcher::int_pressure_limit()
2769 {
2770 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2771 }
2772
2773 uint Matcher::float_pressure_limit()
2774 {
2775 // After experiment around with different values, the following default threshold
2776 // works best for LCM's register pressure scheduling on x64.
2777 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2778 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2779 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2780 }
2781
2782 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2783 // In 64 bit mode a code which use multiply when
2784 // devisor is constant is faster than hardware
2785 // DIV instruction (it uses MulHiL).
2786 return false;
2787 }
2788
2789 // Register for DIVI projection of divmodI
2790 const RegMask& Matcher::divI_proj_mask() {
2791 return INT_RAX_REG_mask();
2792 }
2793
2794 // Register for MODI projection of divmodI
2795 const RegMask& Matcher::modI_proj_mask() {
2796 return INT_RDX_REG_mask();
2797 }
2798
2799 // Register for DIVL projection of divmodL
2800 const RegMask& Matcher::divL_proj_mask() {
2801 return LONG_RAX_REG_mask();
2802 }
2803
2804 // Register for MODL projection of divmodL
2805 const RegMask& Matcher::modL_proj_mask() {
2806 return LONG_RDX_REG_mask();
2807 }
2808
2809 %}
2810
2811 source_hpp %{
2812 // Header information of the source block.
2813 // Method declarations/definitions which are used outside
2814 // the ad-scope can conveniently be defined here.
2815 //
2816 // To keep related declarations/definitions/uses close together,
2817 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2818
2819 #include "runtime/vm_version.hpp"
2820
2821 class NativeJump;
2822
2823 class CallStubImpl {
2824
2825 //--------------------------------------------------------------
2826 //---< Used for optimization in Compile::shorten_branches >---
2827 //--------------------------------------------------------------
2828
2829 public:
2830 // Size of call trampoline stub.
2831 static uint size_call_trampoline() {
2832 return 0; // no call trampolines on this platform
2833 }
2834
2835 // number of relocations needed by a call trampoline stub
2836 static uint reloc_call_trampoline() {
2837 return 0; // no call trampolines on this platform
2838 }
2839 };
2840
2841 class HandlerImpl {
2842
2843 public:
2844
2845 static int emit_deopt_handler(C2_MacroAssembler* masm);
2846
2847 static uint size_deopt_handler() {
2848 // one call and one jmp.
2849 return 7;
2850 }
2851 };
2852
2853 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2854 switch(bytes) {
2855 case 4: // fall-through
2856 case 8: // fall-through
2857 case 16: return Assembler::AVX_128bit;
2858 case 32: return Assembler::AVX_256bit;
2859 case 64: return Assembler::AVX_512bit;
2860
2861 default: {
2862 ShouldNotReachHere();
2863 return Assembler::AVX_NoVec;
2864 }
2865 }
2866 }
2867
2868 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2869 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2870 }
2871
2872 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2873 uint def_idx = use->operand_index(opnd);
2874 Node* def = use->in(def_idx);
2875 return vector_length_encoding(def);
2876 }
2877
2878 static inline bool is_vector_popcount_predicate(BasicType bt) {
2879 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2880 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2881 }
2882
2883 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2884 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2885 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2886 }
2887
2888 class Node::PD {
2889 public:
2890 enum NodeFlags : uint64_t {
2891 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2892 Flag_sets_carry_flag = Node::_last_flag << 2,
2893 Flag_sets_parity_flag = Node::_last_flag << 3,
2894 Flag_sets_zero_flag = Node::_last_flag << 4,
2895 Flag_sets_overflow_flag = Node::_last_flag << 5,
2896 Flag_sets_sign_flag = Node::_last_flag << 6,
2897 Flag_clears_carry_flag = Node::_last_flag << 7,
2898 Flag_clears_parity_flag = Node::_last_flag << 8,
2899 Flag_clears_zero_flag = Node::_last_flag << 9,
2900 Flag_clears_overflow_flag = Node::_last_flag << 10,
2901 Flag_clears_sign_flag = Node::_last_flag << 11,
2902 Flag_ndd_demotable = Node::_last_flag << 12,
2903 Flag_ndd_demotable_commutative = Node::_last_flag << 13,
2904 _last_flag = Flag_ndd_demotable_commutative
2905 };
2906 };
2907
2908 %} // end source_hpp
2909
2910 source %{
2911
2912 #include "opto/addnode.hpp"
2913 #include "c2_intelJccErratum_x86.hpp"
2914
2915 void PhaseOutput::pd_perform_mach_node_analysis() {
2916 if (VM_Version::has_intel_jcc_erratum()) {
2917 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2918 _buf_sizes._code += extra_padding;
2919 }
2920 }
2921
2922 int MachNode::pd_alignment_required() const {
2923 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2924 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2925 return IntelJccErratum::largest_jcc_size() + 1;
2926 } else {
2927 return 1;
2928 }
2929 }
2930
2931 int MachNode::compute_padding(int current_offset) const {
2932 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2933 Compile* C = Compile::current();
2934 PhaseOutput* output = C->output();
2935 Block* block = output->block();
2936 int index = output->index();
2937 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2938 } else {
2939 return 0;
2940 }
2941 }
2942
2943 // Emit deopt handler code.
2944 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2945
2946 // Note that the code buffer's insts_mark is always relative to insts.
2947 // That's why we must use the macroassembler to generate a handler.
2948 address base = __ start_a_stub(size_deopt_handler());
2949 if (base == nullptr) {
2950 ciEnv::current()->record_failure("CodeCache is full");
2951 return 0; // CodeBuffer::expand failed
2952 }
2953 int offset = __ offset();
2954
2955 Label start;
2956 __ bind(start);
2957
2958 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2959
2960 int entry_offset = __ offset();
2961
2962 __ jmp(start);
2963
2964 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2965 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2966 "out of bounds read in post-call NOP check");
2967 __ end_a_stub();
2968 return entry_offset;
2969 }
2970
2971 static Assembler::Width widthForType(BasicType bt) {
2972 if (bt == T_BYTE) {
2973 return Assembler::B;
2974 } else if (bt == T_SHORT) {
2975 return Assembler::W;
2976 } else if (bt == T_INT) {
2977 return Assembler::D;
2978 } else {
2979 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2980 return Assembler::Q;
2981 }
2982 }
2983
2984 //=============================================================================
2985
2986 // Float masks come from different places depending on platform.
2987 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2988 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2989 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2990 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2991 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2992 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2993 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2994 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2995 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2996 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2997 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2998 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2999 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
3000 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
3001 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
3002 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
3003 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
3004 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
3005 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
3006
3007 //=============================================================================
3008 bool Matcher::match_rule_supported(int opcode) {
3009 if (!has_match_rule(opcode)) {
3010 return false; // no match rule present
3011 }
3012 switch (opcode) {
3013 case Op_AbsVL:
3014 case Op_StoreVectorScatter:
3015 if (UseAVX < 3) {
3016 return false;
3017 }
3018 break;
3019 case Op_PopCountI:
3020 case Op_PopCountL:
3021 if (!UsePopCountInstruction) {
3022 return false;
3023 }
3024 break;
3025 case Op_PopCountVI:
3026 if (UseAVX < 2) {
3027 return false;
3028 }
3029 break;
3030 case Op_CompressV:
3031 case Op_ExpandV:
3032 case Op_PopCountVL:
3033 if (UseAVX < 2) {
3034 return false;
3035 }
3036 break;
3037 case Op_MulVI:
3038 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3039 return false;
3040 }
3041 break;
3042 case Op_MulVL:
3043 if (UseSSE < 4) { // only with SSE4_1 or AVX
3044 return false;
3045 }
3046 break;
3047 case Op_MulReductionVL:
3048 if (VM_Version::supports_avx512dq() == false) {
3049 return false;
3050 }
3051 break;
3052 case Op_AbsVB:
3053 case Op_AbsVS:
3054 case Op_AbsVI:
3055 case Op_AddReductionVI:
3056 case Op_AndReductionV:
3057 case Op_OrReductionV:
3058 case Op_XorReductionV:
3059 if (UseSSE < 3) { // requires at least SSSE3
3060 return false;
3061 }
3062 break;
3063 case Op_MaxHF:
3064 case Op_MinHF:
3065 if (!VM_Version::supports_avx512vlbw()) {
3066 return false;
3067 } // fallthrough
3068 case Op_AddHF:
3069 case Op_DivHF:
3070 case Op_FmaHF:
3071 case Op_MulHF:
3072 case Op_ReinterpretS2HF:
3073 case Op_ReinterpretHF2S:
3074 case Op_SubHF:
3075 case Op_SqrtHF:
3076 if (!VM_Version::supports_avx512_fp16()) {
3077 return false;
3078 }
3079 break;
3080 case Op_VectorLoadShuffle:
3081 case Op_VectorRearrange:
3082 case Op_MulReductionVI:
3083 if (UseSSE < 4) { // requires at least SSE4
3084 return false;
3085 }
3086 break;
3087 case Op_IsInfiniteF:
3088 case Op_IsInfiniteD:
3089 if (!VM_Version::supports_avx512dq()) {
3090 return false;
3091 }
3092 break;
3093 case Op_SqrtVD:
3094 case Op_SqrtVF:
3095 case Op_VectorMaskCmp:
3096 case Op_VectorCastB2X:
3097 case Op_VectorCastS2X:
3098 case Op_VectorCastI2X:
3099 case Op_VectorCastL2X:
3100 case Op_VectorCastF2X:
3101 case Op_VectorCastD2X:
3102 case Op_VectorUCastB2X:
3103 case Op_VectorUCastS2X:
3104 case Op_VectorUCastI2X:
3105 case Op_VectorMaskCast:
3106 if (UseAVX < 1) { // enabled for AVX only
3107 return false;
3108 }
3109 break;
3110 case Op_PopulateIndex:
3111 if (UseAVX < 2) {
3112 return false;
3113 }
3114 break;
3115 case Op_RoundVF:
3116 if (UseAVX < 2) { // enabled for AVX2 only
3117 return false;
3118 }
3119 break;
3120 case Op_RoundVD:
3121 if (UseAVX < 3) {
3122 return false; // enabled for AVX3 only
3123 }
3124 break;
3125 case Op_CompareAndSwapL:
3126 case Op_CompareAndSwapP:
3127 break;
3128 case Op_StrIndexOf:
3129 if (!UseSSE42Intrinsics) {
3130 return false;
3131 }
3132 break;
3133 case Op_StrIndexOfChar:
3134 if (!UseSSE42Intrinsics) {
3135 return false;
3136 }
3137 break;
3138 case Op_OnSpinWait:
3139 if (VM_Version::supports_on_spin_wait() == false) {
3140 return false;
3141 }
3142 break;
3143 case Op_MulVB:
3144 case Op_LShiftVB:
3145 case Op_RShiftVB:
3146 case Op_URShiftVB:
3147 case Op_VectorInsert:
3148 case Op_VectorLoadMask:
3149 case Op_VectorStoreMask:
3150 case Op_VectorBlend:
3151 if (UseSSE < 4) {
3152 return false;
3153 }
3154 break;
3155 case Op_MaxD:
3156 case Op_MaxF:
3157 case Op_MinD:
3158 case Op_MinF:
3159 if (UseAVX < 1) { // enabled for AVX only
3160 return false;
3161 }
3162 break;
3163 case Op_CacheWB:
3164 case Op_CacheWBPreSync:
3165 case Op_CacheWBPostSync:
3166 if (!VM_Version::supports_data_cache_line_flush()) {
3167 return false;
3168 }
3169 break;
3170 case Op_ExtractB:
3171 case Op_ExtractL:
3172 case Op_ExtractI:
3173 case Op_RoundDoubleMode:
3174 if (UseSSE < 4) {
3175 return false;
3176 }
3177 break;
3178 case Op_RoundDoubleModeV:
3179 if (VM_Version::supports_avx() == false) {
3180 return false; // 128bit vroundpd is not available
3181 }
3182 break;
3183 case Op_LoadVectorGather:
3184 case Op_LoadVectorGatherMasked:
3185 if (UseAVX < 2) {
3186 return false;
3187 }
3188 break;
3189 case Op_FmaF:
3190 case Op_FmaD:
3191 case Op_FmaVD:
3192 case Op_FmaVF:
3193 if (!UseFMA) {
3194 return false;
3195 }
3196 break;
3197 case Op_MacroLogicV:
3198 if (UseAVX < 3 || !UseVectorMacroLogic) {
3199 return false;
3200 }
3201 break;
3202
3203 case Op_VectorCmpMasked:
3204 case Op_VectorMaskGen:
3205 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3206 return false;
3207 }
3208 break;
3209 case Op_VectorMaskFirstTrue:
3210 case Op_VectorMaskLastTrue:
3211 case Op_VectorMaskTrueCount:
3212 case Op_VectorMaskToLong:
3213 if (UseAVX < 1) {
3214 return false;
3215 }
3216 break;
3217 case Op_RoundF:
3218 case Op_RoundD:
3219 break;
3220 case Op_CopySignD:
3221 case Op_CopySignF:
3222 if (UseAVX < 3) {
3223 return false;
3224 }
3225 if (!VM_Version::supports_avx512vl()) {
3226 return false;
3227 }
3228 break;
3229 case Op_CompressBits:
3230 case Op_ExpandBits:
3231 if (!VM_Version::supports_bmi2()) {
3232 return false;
3233 }
3234 break;
3235 case Op_CompressM:
3236 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3237 return false;
3238 }
3239 break;
3240 case Op_ConvF2HF:
3241 case Op_ConvHF2F:
3242 if (!VM_Version::supports_float16()) {
3243 return false;
3244 }
3245 break;
3246 case Op_VectorCastF2HF:
3247 case Op_VectorCastHF2F:
3248 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3249 return false;
3250 }
3251 break;
3252 }
3253 return true; // Match rules are supported by default.
3254 }
3255
3256 //------------------------------------------------------------------------
3257
3258 static inline bool is_pop_count_instr_target(BasicType bt) {
3259 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3260 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3261 }
3262
3263 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3264 return match_rule_supported_vector(opcode, vlen, bt);
3265 }
3266
3267 // Identify extra cases that we might want to provide match rules for vector nodes and
3268 // other intrinsics guarded with vector length (vlen) and element type (bt).
3269 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3270 if (!match_rule_supported(opcode)) {
3271 return false;
3272 }
3273 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3274 // * SSE2 supports 128bit vectors for all types;
3275 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3276 // * AVX2 supports 256bit vectors for all types;
3277 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3278 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3279 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3280 // And MaxVectorSize is taken into account as well.
3281 if (!vector_size_supported(bt, vlen)) {
3282 return false;
3283 }
3284 // Special cases which require vector length follow:
3285 // * implementation limitations
3286 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3287 // * 128bit vroundpd instruction is present only in AVX1
3288 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3289 switch (opcode) {
3290 case Op_MaxVHF:
3291 case Op_MinVHF:
3292 if (!VM_Version::supports_avx512bw()) {
3293 return false;
3294 }
3295 case Op_AddVHF:
3296 case Op_DivVHF:
3297 case Op_FmaVHF:
3298 case Op_MulVHF:
3299 case Op_SubVHF:
3300 case Op_SqrtVHF:
3301 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3302 return false;
3303 }
3304 if (!VM_Version::supports_avx512_fp16()) {
3305 return false;
3306 }
3307 break;
3308 case Op_AbsVF:
3309 case Op_NegVF:
3310 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3311 return false; // 512bit vandps and vxorps are not available
3312 }
3313 break;
3314 case Op_AbsVD:
3315 case Op_NegVD:
3316 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3317 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3318 }
3319 break;
3320 case Op_RotateRightV:
3321 case Op_RotateLeftV:
3322 if (bt != T_INT && bt != T_LONG) {
3323 return false;
3324 } // fallthrough
3325 case Op_MacroLogicV:
3326 if (!VM_Version::supports_evex() ||
3327 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3328 return false;
3329 }
3330 break;
3331 case Op_ClearArray:
3332 case Op_VectorMaskGen:
3333 case Op_VectorCmpMasked:
3334 if (!VM_Version::supports_avx512bw()) {
3335 return false;
3336 }
3337 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3338 return false;
3339 }
3340 break;
3341 case Op_LoadVectorMasked:
3342 case Op_StoreVectorMasked:
3343 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3344 return false;
3345 }
3346 break;
3347 case Op_UMinV:
3348 case Op_UMaxV:
3349 if (UseAVX == 0) {
3350 return false;
3351 }
3352 break;
3353 case Op_MaxV:
3354 case Op_MinV:
3355 if (UseSSE < 4 && is_integral_type(bt)) {
3356 return false;
3357 }
3358 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3359 // Float/Double intrinsics are enabled for AVX family currently.
3360 if (UseAVX == 0) {
3361 return false;
3362 }
3363 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3364 return false;
3365 }
3366 }
3367 break;
3368 case Op_CallLeafVector:
3369 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3370 return false;
3371 }
3372 break;
3373 case Op_AddReductionVI:
3374 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3375 return false;
3376 }
3377 // fallthrough
3378 case Op_AndReductionV:
3379 case Op_OrReductionV:
3380 case Op_XorReductionV:
3381 if (is_subword_type(bt) && (UseSSE < 4)) {
3382 return false;
3383 }
3384 break;
3385 case Op_MinReductionV:
3386 case Op_MaxReductionV:
3387 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3388 return false;
3389 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3390 return false;
3391 }
3392 // Float/Double intrinsics enabled for AVX family.
3393 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3394 return false;
3395 }
3396 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3397 return false;
3398 }
3399 break;
3400 case Op_VectorBlend:
3401 if (UseAVX == 0 && size_in_bits < 128) {
3402 return false;
3403 }
3404 break;
3405 case Op_VectorTest:
3406 if (UseSSE < 4) {
3407 return false; // Implementation limitation
3408 } else if (size_in_bits < 32) {
3409 return false; // Implementation limitation
3410 }
3411 break;
3412 case Op_VectorLoadShuffle:
3413 case Op_VectorRearrange:
3414 if(vlen == 2) {
3415 return false; // Implementation limitation due to how shuffle is loaded
3416 } else if (size_in_bits == 256 && UseAVX < 2) {
3417 return false; // Implementation limitation
3418 }
3419 break;
3420 case Op_VectorLoadMask:
3421 case Op_VectorMaskCast:
3422 if (size_in_bits == 256 && UseAVX < 2) {
3423 return false; // Implementation limitation
3424 }
3425 // fallthrough
3426 case Op_VectorStoreMask:
3427 if (vlen == 2) {
3428 return false; // Implementation limitation
3429 }
3430 break;
3431 case Op_PopulateIndex:
3432 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3433 return false;
3434 }
3435 break;
3436 case Op_VectorCastB2X:
3437 case Op_VectorCastS2X:
3438 case Op_VectorCastI2X:
3439 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3440 return false;
3441 }
3442 break;
3443 case Op_VectorCastL2X:
3444 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3445 return false;
3446 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3447 return false;
3448 }
3449 break;
3450 case Op_VectorCastF2X: {
3451 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3452 // happen after intermediate conversion to integer and special handling
3453 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3454 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3455 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3456 return false;
3457 }
3458 }
3459 // fallthrough
3460 case Op_VectorCastD2X:
3461 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3462 return false;
3463 }
3464 break;
3465 case Op_VectorCastF2HF:
3466 case Op_VectorCastHF2F:
3467 if (!VM_Version::supports_f16c() &&
3468 ((!VM_Version::supports_evex() ||
3469 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3470 return false;
3471 }
3472 break;
3473 case Op_RoundVD:
3474 if (!VM_Version::supports_avx512dq()) {
3475 return false;
3476 }
3477 break;
3478 case Op_MulReductionVI:
3479 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3480 return false;
3481 }
3482 break;
3483 case Op_LoadVectorGatherMasked:
3484 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3485 return false;
3486 }
3487 if (is_subword_type(bt) &&
3488 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3489 (size_in_bits < 64) ||
3490 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3491 return false;
3492 }
3493 break;
3494 case Op_StoreVectorScatterMasked:
3495 case Op_StoreVectorScatter:
3496 if (is_subword_type(bt)) {
3497 return false;
3498 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3499 return false;
3500 }
3501 // fallthrough
3502 case Op_LoadVectorGather:
3503 if (!is_subword_type(bt) && size_in_bits == 64) {
3504 return false;
3505 }
3506 if (is_subword_type(bt) && size_in_bits < 64) {
3507 return false;
3508 }
3509 break;
3510 case Op_SaturatingAddV:
3511 case Op_SaturatingSubV:
3512 if (UseAVX < 1) {
3513 return false; // Implementation limitation
3514 }
3515 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3516 return false;
3517 }
3518 break;
3519 case Op_SelectFromTwoVector:
3520 if (size_in_bits < 128) {
3521 return false;
3522 }
3523 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3524 return false;
3525 }
3526 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3527 return false;
3528 }
3529 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3530 return false;
3531 }
3532 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3533 return false;
3534 }
3535 break;
3536 case Op_MaskAll:
3537 if (!VM_Version::supports_evex()) {
3538 return false;
3539 }
3540 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3541 return false;
3542 }
3543 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3544 return false;
3545 }
3546 break;
3547 case Op_VectorMaskCmp:
3548 if (vlen < 2 || size_in_bits < 32) {
3549 return false;
3550 }
3551 break;
3552 case Op_CompressM:
3553 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3554 return false;
3555 }
3556 break;
3557 case Op_CompressV:
3558 case Op_ExpandV:
3559 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3560 return false;
3561 }
3562 if (size_in_bits < 128 ) {
3563 return false;
3564 }
3565 case Op_VectorLongToMask:
3566 if (UseAVX < 1) {
3567 return false;
3568 }
3569 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3570 return false;
3571 }
3572 break;
3573 case Op_SignumVD:
3574 case Op_SignumVF:
3575 if (UseAVX < 1) {
3576 return false;
3577 }
3578 break;
3579 case Op_PopCountVI:
3580 case Op_PopCountVL: {
3581 if (!is_pop_count_instr_target(bt) &&
3582 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3583 return false;
3584 }
3585 }
3586 break;
3587 case Op_ReverseV:
3588 case Op_ReverseBytesV:
3589 if (UseAVX < 2) {
3590 return false;
3591 }
3592 break;
3593 case Op_CountTrailingZerosV:
3594 case Op_CountLeadingZerosV:
3595 if (UseAVX < 2) {
3596 return false;
3597 }
3598 break;
3599 }
3600 return true; // Per default match rules are supported.
3601 }
3602
3603 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3604 // ADLC based match_rule_supported routine checks for the existence of pattern based
3605 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3606 // of their non-masked counterpart with mask edge being the differentiator.
3607 // This routine does a strict check on the existence of masked operation patterns
3608 // by returning a default false value for all the other opcodes apart from the
3609 // ones whose masked instruction patterns are defined in this file.
3610 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3611 return false;
3612 }
3613
3614 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3615 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3616 return false;
3617 }
3618 switch(opcode) {
3619 // Unary masked operations
3620 case Op_AbsVB:
3621 case Op_AbsVS:
3622 if(!VM_Version::supports_avx512bw()) {
3623 return false; // Implementation limitation
3624 }
3625 case Op_AbsVI:
3626 case Op_AbsVL:
3627 return true;
3628
3629 // Ternary masked operations
3630 case Op_FmaVF:
3631 case Op_FmaVD:
3632 return true;
3633
3634 case Op_MacroLogicV:
3635 if(bt != T_INT && bt != T_LONG) {
3636 return false;
3637 }
3638 return true;
3639
3640 // Binary masked operations
3641 case Op_AddVB:
3642 case Op_AddVS:
3643 case Op_SubVB:
3644 case Op_SubVS:
3645 case Op_MulVS:
3646 case Op_LShiftVS:
3647 case Op_RShiftVS:
3648 case Op_URShiftVS:
3649 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3650 if (!VM_Version::supports_avx512bw()) {
3651 return false; // Implementation limitation
3652 }
3653 return true;
3654
3655 case Op_MulVL:
3656 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3657 if (!VM_Version::supports_avx512dq()) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_AndV:
3663 case Op_OrV:
3664 case Op_XorV:
3665 case Op_RotateRightV:
3666 case Op_RotateLeftV:
3667 if (bt != T_INT && bt != T_LONG) {
3668 return false; // Implementation limitation
3669 }
3670 return true;
3671
3672 case Op_VectorLoadMask:
3673 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3674 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3675 return false;
3676 }
3677 return true;
3678
3679 case Op_AddVI:
3680 case Op_AddVL:
3681 case Op_AddVF:
3682 case Op_AddVD:
3683 case Op_SubVI:
3684 case Op_SubVL:
3685 case Op_SubVF:
3686 case Op_SubVD:
3687 case Op_MulVI:
3688 case Op_MulVF:
3689 case Op_MulVD:
3690 case Op_DivVF:
3691 case Op_DivVD:
3692 case Op_SqrtVF:
3693 case Op_SqrtVD:
3694 case Op_LShiftVI:
3695 case Op_LShiftVL:
3696 case Op_RShiftVI:
3697 case Op_RShiftVL:
3698 case Op_URShiftVI:
3699 case Op_URShiftVL:
3700 case Op_LoadVectorMasked:
3701 case Op_StoreVectorMasked:
3702 case Op_LoadVectorGatherMasked:
3703 case Op_StoreVectorScatterMasked:
3704 return true;
3705
3706 case Op_UMinV:
3707 case Op_UMaxV:
3708 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3709 return false;
3710 } // fallthrough
3711 case Op_MaxV:
3712 case Op_MinV:
3713 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3714 return false; // Implementation limitation
3715 }
3716 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3717 return false; // Implementation limitation
3718 }
3719 return true;
3720 case Op_SaturatingAddV:
3721 case Op_SaturatingSubV:
3722 if (!is_subword_type(bt)) {
3723 return false;
3724 }
3725 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3726 return false; // Implementation limitation
3727 }
3728 return true;
3729
3730 case Op_VectorMaskCmp:
3731 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3732 return false; // Implementation limitation
3733 }
3734 return true;
3735
3736 case Op_VectorRearrange:
3737 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3738 return false; // Implementation limitation
3739 }
3740 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3741 return false; // Implementation limitation
3742 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3743 return false; // Implementation limitation
3744 }
3745 return true;
3746
3747 // Binary Logical operations
3748 case Op_AndVMask:
3749 case Op_OrVMask:
3750 case Op_XorVMask:
3751 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3752 return false; // Implementation limitation
3753 }
3754 return true;
3755
3756 case Op_PopCountVI:
3757 case Op_PopCountVL:
3758 if (!is_pop_count_instr_target(bt)) {
3759 return false;
3760 }
3761 return true;
3762
3763 case Op_MaskAll:
3764 return true;
3765
3766 case Op_CountLeadingZerosV:
3767 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3768 return true;
3769 }
3770 default:
3771 return false;
3772 }
3773 }
3774
3775 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3776 return false;
3777 }
3778
3779 // Return true if Vector::rearrange needs preparation of the shuffle argument
3780 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3781 switch (elem_bt) {
3782 case T_BYTE: return false;
3783 case T_SHORT: return !VM_Version::supports_avx512bw();
3784 case T_INT: return !VM_Version::supports_avx();
3785 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3786 default:
3787 ShouldNotReachHere();
3788 return false;
3789 }
3790 }
3791
3792 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3793 // Prefer predicate if the mask type is "TypeVectMask".
3794 return vt->isa_vectmask() != nullptr;
3795 }
3796
3797 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3798 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3799 bool legacy = (generic_opnd->opcode() == LEGVEC);
3800 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3801 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3802 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3803 return new legVecZOper();
3804 }
3805 if (legacy) {
3806 switch (ideal_reg) {
3807 case Op_VecS: return new legVecSOper();
3808 case Op_VecD: return new legVecDOper();
3809 case Op_VecX: return new legVecXOper();
3810 case Op_VecY: return new legVecYOper();
3811 case Op_VecZ: return new legVecZOper();
3812 }
3813 } else {
3814 switch (ideal_reg) {
3815 case Op_VecS: return new vecSOper();
3816 case Op_VecD: return new vecDOper();
3817 case Op_VecX: return new vecXOper();
3818 case Op_VecY: return new vecYOper();
3819 case Op_VecZ: return new vecZOper();
3820 }
3821 }
3822 ShouldNotReachHere();
3823 return nullptr;
3824 }
3825
3826 bool Matcher::is_reg2reg_move(MachNode* m) {
3827 switch (m->rule()) {
3828 case MoveVec2Leg_rule:
3829 case MoveLeg2Vec_rule:
3830 case MoveF2VL_rule:
3831 case MoveF2LEG_rule:
3832 case MoveVL2F_rule:
3833 case MoveLEG2F_rule:
3834 case MoveD2VL_rule:
3835 case MoveD2LEG_rule:
3836 case MoveVL2D_rule:
3837 case MoveLEG2D_rule:
3838 return true;
3839 default:
3840 return false;
3841 }
3842 }
3843
3844 bool Matcher::is_generic_vector(MachOper* opnd) {
3845 switch (opnd->opcode()) {
3846 case VEC:
3847 case LEGVEC:
3848 return true;
3849 default:
3850 return false;
3851 }
3852 }
3853
3854 //------------------------------------------------------------------------
3855
3856 const RegMask* Matcher::predicate_reg_mask(void) {
3857 return &_VECTMASK_REG_mask;
3858 }
3859
3860 // Max vector size in bytes. 0 if not supported.
3861 int Matcher::vector_width_in_bytes(BasicType bt) {
3862 assert(is_java_primitive(bt), "only primitive type vectors");
3863 // SSE2 supports 128bit vectors for all types.
3864 // AVX2 supports 256bit vectors for all types.
3865 // AVX2/EVEX supports 512bit vectors for all types.
3866 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3867 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3868 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3869 size = (UseAVX > 2) ? 64 : 32;
3870 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3871 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3872 // Use flag to limit vector size.
3873 size = MIN2(size,(int)MaxVectorSize);
3874 // Minimum 2 values in vector (or 4 for bytes).
3875 switch (bt) {
3876 case T_DOUBLE:
3877 case T_LONG:
3878 if (size < 16) return 0;
3879 break;
3880 case T_FLOAT:
3881 case T_INT:
3882 if (size < 8) return 0;
3883 break;
3884 case T_BOOLEAN:
3885 if (size < 4) return 0;
3886 break;
3887 case T_CHAR:
3888 if (size < 4) return 0;
3889 break;
3890 case T_BYTE:
3891 if (size < 4) return 0;
3892 break;
3893 case T_SHORT:
3894 if (size < 4) return 0;
3895 break;
3896 default:
3897 ShouldNotReachHere();
3898 }
3899 return size;
3900 }
3901
3902 // Limits on vector size (number of elements) loaded into vector.
3903 int Matcher::max_vector_size(const BasicType bt) {
3904 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3905 }
3906 int Matcher::min_vector_size(const BasicType bt) {
3907 int max_size = max_vector_size(bt);
3908 // Min size which can be loaded into vector is 4 bytes.
3909 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3910 // Support for calling svml double64 vectors
3911 if (bt == T_DOUBLE) {
3912 size = 1;
3913 }
3914 return MIN2(size,max_size);
3915 }
3916
3917 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3918 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3919 // by default on Cascade Lake
3920 if (VM_Version::is_default_intel_cascade_lake()) {
3921 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3922 }
3923 return Matcher::max_vector_size(bt);
3924 }
3925
3926 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3927 return -1;
3928 }
3929
3930 // Vector ideal reg corresponding to specified size in bytes
3931 uint Matcher::vector_ideal_reg(int size) {
3932 assert(MaxVectorSize >= size, "");
3933 switch(size) {
3934 case 4: return Op_VecS;
3935 case 8: return Op_VecD;
3936 case 16: return Op_VecX;
3937 case 32: return Op_VecY;
3938 case 64: return Op_VecZ;
3939 }
3940 ShouldNotReachHere();
3941 return 0;
3942 }
3943
3944 // Check for shift by small constant as well
3945 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3946 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3947 shift->in(2)->get_int() <= 3 &&
3948 // Are there other uses besides address expressions?
3949 !matcher->is_visited(shift)) {
3950 address_visited.set(shift->_idx); // Flag as address_visited
3951 mstack.push(shift->in(2), Matcher::Visit);
3952 Node *conv = shift->in(1);
3953 // Allow Matcher to match the rule which bypass
3954 // ConvI2L operation for an array index on LP64
3955 // if the index value is positive.
3956 if (conv->Opcode() == Op_ConvI2L &&
3957 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3958 // Are there other uses besides address expressions?
3959 !matcher->is_visited(conv)) {
3960 address_visited.set(conv->_idx); // Flag as address_visited
3961 mstack.push(conv->in(1), Matcher::Pre_Visit);
3962 } else {
3963 mstack.push(conv, Matcher::Pre_Visit);
3964 }
3965 return true;
3966 }
3967 return false;
3968 }
3969
3970 // This function identifies sub-graphs in which a 'load' node is
3971 // input to two different nodes, and such that it can be matched
3972 // with BMI instructions like blsi, blsr, etc.
3973 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3974 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3975 // refers to the same node.
3976 //
3977 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3978 // This is a temporary solution until we make DAGs expressible in ADL.
3979 template<typename ConType>
3980 class FusedPatternMatcher {
3981 Node* _op1_node;
3982 Node* _mop_node;
3983 int _con_op;
3984
3985 static int match_next(Node* n, int next_op, int next_op_idx) {
3986 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3987 return -1;
3988 }
3989
3990 if (next_op_idx == -1) { // n is commutative, try rotations
3991 if (n->in(1)->Opcode() == next_op) {
3992 return 1;
3993 } else if (n->in(2)->Opcode() == next_op) {
3994 return 2;
3995 }
3996 } else {
3997 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3998 if (n->in(next_op_idx)->Opcode() == next_op) {
3999 return next_op_idx;
4000 }
4001 }
4002 return -1;
4003 }
4004
4005 public:
4006 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4007 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4008
4009 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4010 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4011 typename ConType::NativeType con_value) {
4012 if (_op1_node->Opcode() != op1) {
4013 return false;
4014 }
4015 if (_mop_node->outcnt() > 2) {
4016 return false;
4017 }
4018 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4019 if (op1_op2_idx == -1) {
4020 return false;
4021 }
4022 // Memory operation must be the other edge
4023 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4024
4025 // Check that the mop node is really what we want
4026 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4027 Node* op2_node = _op1_node->in(op1_op2_idx);
4028 if (op2_node->outcnt() > 1) {
4029 return false;
4030 }
4031 assert(op2_node->Opcode() == op2, "Should be");
4032 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4033 if (op2_con_idx == -1) {
4034 return false;
4035 }
4036 // Memory operation must be the other edge
4037 int op2_mop_idx = (op2_con_idx & 1) + 1;
4038 // Check that the memory operation is the same node
4039 if (op2_node->in(op2_mop_idx) == _mop_node) {
4040 // Now check the constant
4041 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4042 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4043 return true;
4044 }
4045 }
4046 }
4047 return false;
4048 }
4049 };
4050
4051 static bool is_bmi_pattern(Node* n, Node* m) {
4052 assert(UseBMI1Instructions, "sanity");
4053 if (n != nullptr && m != nullptr) {
4054 if (m->Opcode() == Op_LoadI) {
4055 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4056 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4057 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4058 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4059 } else if (m->Opcode() == Op_LoadL) {
4060 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4061 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4062 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4063 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4064 }
4065 }
4066 return false;
4067 }
4068
4069 // Should the matcher clone input 'm' of node 'n'?
4070 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4071 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4072 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4073 mstack.push(m, Visit);
4074 return true;
4075 }
4076 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4077 mstack.push(m, Visit); // m = ShiftCntV
4078 return true;
4079 }
4080 if (is_encode_and_store_pattern(n, m)) {
4081 mstack.push(m, Visit);
4082 return true;
4083 }
4084 return false;
4085 }
4086
4087 // Should the Matcher clone shifts on addressing modes, expecting them
4088 // to be subsumed into complex addressing expressions or compute them
4089 // into registers?
4090 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4091 Node *off = m->in(AddPNode::Offset);
4092 if (off->is_Con()) {
4093 address_visited.test_set(m->_idx); // Flag as address_visited
4094 Node *adr = m->in(AddPNode::Address);
4095
4096 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4097 // AtomicAdd is not an addressing expression.
4098 // Cheap to find it by looking for screwy base.
4099 if (adr->is_AddP() &&
4100 !adr->in(AddPNode::Base)->is_top() &&
4101 !adr->in(AddPNode::Offset)->is_Con() &&
4102 off->get_long() == (int) (off->get_long()) && // immL32
4103 // Are there other uses besides address expressions?
4104 !is_visited(adr)) {
4105 address_visited.set(adr->_idx); // Flag as address_visited
4106 Node *shift = adr->in(AddPNode::Offset);
4107 if (!clone_shift(shift, this, mstack, address_visited)) {
4108 mstack.push(shift, Pre_Visit);
4109 }
4110 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4111 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4112 } else {
4113 mstack.push(adr, Pre_Visit);
4114 }
4115
4116 // Clone X+offset as it also folds into most addressing expressions
4117 mstack.push(off, Visit);
4118 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4119 return true;
4120 } else if (clone_shift(off, this, mstack, address_visited)) {
4121 address_visited.test_set(m->_idx); // Flag as address_visited
4122 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4123 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4124 return true;
4125 }
4126 return false;
4127 }
4128
4129 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4130 switch (bt) {
4131 case BoolTest::eq:
4132 return Assembler::eq;
4133 case BoolTest::ne:
4134 return Assembler::neq;
4135 case BoolTest::le:
4136 case BoolTest::ule:
4137 return Assembler::le;
4138 case BoolTest::ge:
4139 case BoolTest::uge:
4140 return Assembler::nlt;
4141 case BoolTest::lt:
4142 case BoolTest::ult:
4143 return Assembler::lt;
4144 case BoolTest::gt:
4145 case BoolTest::ugt:
4146 return Assembler::nle;
4147 default : ShouldNotReachHere(); return Assembler::_false;
4148 }
4149 }
4150
4151 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4152 switch (bt) {
4153 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4154 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4155 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4156 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4157 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4158 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4159 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4160 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4161 }
4162 }
4163
4164 // Helper methods for MachSpillCopyNode::implementation().
4165 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4166 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4167 assert(ireg == Op_VecS || // 32bit vector
4168 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4169 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4170 "no non-adjacent vector moves" );
4171 if (masm) {
4172 switch (ireg) {
4173 case Op_VecS: // copy whole register
4174 case Op_VecD:
4175 case Op_VecX:
4176 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4177 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4178 } else {
4179 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4180 }
4181 break;
4182 case Op_VecY:
4183 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4184 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4185 } else {
4186 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4187 }
4188 break;
4189 case Op_VecZ:
4190 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4191 break;
4192 default:
4193 ShouldNotReachHere();
4194 }
4195 #ifndef PRODUCT
4196 } else {
4197 switch (ireg) {
4198 case Op_VecS:
4199 case Op_VecD:
4200 case Op_VecX:
4201 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4202 break;
4203 case Op_VecY:
4204 case Op_VecZ:
4205 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4206 break;
4207 default:
4208 ShouldNotReachHere();
4209 }
4210 #endif
4211 }
4212 }
4213
4214 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4215 int stack_offset, int reg, uint ireg, outputStream* st) {
4216 if (masm) {
4217 if (is_load) {
4218 switch (ireg) {
4219 case Op_VecS:
4220 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4221 break;
4222 case Op_VecD:
4223 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4224 break;
4225 case Op_VecX:
4226 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4227 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4228 } else {
4229 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4230 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4231 }
4232 break;
4233 case Op_VecY:
4234 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4235 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4236 } else {
4237 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4238 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4239 }
4240 break;
4241 case Op_VecZ:
4242 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4243 break;
4244 default:
4245 ShouldNotReachHere();
4246 }
4247 } else { // store
4248 switch (ireg) {
4249 case Op_VecS:
4250 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4251 break;
4252 case Op_VecD:
4253 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4254 break;
4255 case Op_VecX:
4256 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4257 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4258 }
4259 else {
4260 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4261 }
4262 break;
4263 case Op_VecY:
4264 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4265 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4266 }
4267 else {
4268 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4269 }
4270 break;
4271 case Op_VecZ:
4272 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4273 break;
4274 default:
4275 ShouldNotReachHere();
4276 }
4277 }
4278 #ifndef PRODUCT
4279 } else {
4280 if (is_load) {
4281 switch (ireg) {
4282 case Op_VecS:
4283 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4284 break;
4285 case Op_VecD:
4286 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4287 break;
4288 case Op_VecX:
4289 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4290 break;
4291 case Op_VecY:
4292 case Op_VecZ:
4293 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4294 break;
4295 default:
4296 ShouldNotReachHere();
4297 }
4298 } else { // store
4299 switch (ireg) {
4300 case Op_VecS:
4301 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4302 break;
4303 case Op_VecD:
4304 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4305 break;
4306 case Op_VecX:
4307 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4308 break;
4309 case Op_VecY:
4310 case Op_VecZ:
4311 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4312 break;
4313 default:
4314 ShouldNotReachHere();
4315 }
4316 }
4317 #endif
4318 }
4319 }
4320
4321 template <class T>
4322 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4323 int size = type2aelembytes(bt) * len;
4324 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4325 for (int i = 0; i < len; i++) {
4326 int offset = i * type2aelembytes(bt);
4327 switch (bt) {
4328 case T_BYTE: val->at(i) = con; break;
4329 case T_SHORT: {
4330 jshort c = con;
4331 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4332 break;
4333 }
4334 case T_INT: {
4335 jint c = con;
4336 memcpy(val->adr_at(offset), &c, sizeof(jint));
4337 break;
4338 }
4339 case T_LONG: {
4340 jlong c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4342 break;
4343 }
4344 case T_FLOAT: {
4345 jfloat c = con;
4346 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4347 break;
4348 }
4349 case T_DOUBLE: {
4350 jdouble c = con;
4351 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4352 break;
4353 }
4354 default: assert(false, "%s", type2name(bt));
4355 }
4356 }
4357 return val;
4358 }
4359
4360 static inline jlong high_bit_set(BasicType bt) {
4361 switch (bt) {
4362 case T_BYTE: return 0x8080808080808080;
4363 case T_SHORT: return 0x8000800080008000;
4364 case T_INT: return 0x8000000080000000;
4365 case T_LONG: return 0x8000000000000000;
4366 default:
4367 ShouldNotReachHere();
4368 return 0;
4369 }
4370 }
4371
4372 #ifndef PRODUCT
4373 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4374 st->print("nop \t# %d bytes pad for loops and calls", _count);
4375 }
4376 #endif
4377
4378 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4379 __ nop(_count);
4380 }
4381
4382 uint MachNopNode::size(PhaseRegAlloc*) const {
4383 return _count;
4384 }
4385
4386 #ifndef PRODUCT
4387 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4388 st->print("# breakpoint");
4389 }
4390 #endif
4391
4392 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4393 __ int3();
4394 }
4395
4396 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4397 return MachNode::size(ra_);
4398 }
4399
4400 %}
4401
4402 //----------ENCODING BLOCK-----------------------------------------------------
4403 // This block specifies the encoding classes used by the compiler to
4404 // output byte streams. Encoding classes are parameterized macros
4405 // used by Machine Instruction Nodes in order to generate the bit
4406 // encoding of the instruction. Operands specify their base encoding
4407 // interface with the interface keyword. There are currently
4408 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4409 // COND_INTER. REG_INTER causes an operand to generate a function
4410 // which returns its register number when queried. CONST_INTER causes
4411 // an operand to generate a function which returns the value of the
4412 // constant when queried. MEMORY_INTER causes an operand to generate
4413 // four functions which return the Base Register, the Index Register,
4414 // the Scale Value, and the Offset Value of the operand when queried.
4415 // COND_INTER causes an operand to generate six functions which return
4416 // the encoding code (ie - encoding bits for the instruction)
4417 // associated with each basic boolean condition for a conditional
4418 // instruction.
4419 //
4420 // Instructions specify two basic values for encoding. Again, a
4421 // function is available to check if the constant displacement is an
4422 // oop. They use the ins_encode keyword to specify their encoding
4423 // classes (which must be a sequence of enc_class names, and their
4424 // parameters, specified in the encoding block), and they use the
4425 // opcode keyword to specify, in order, their primary, secondary, and
4426 // tertiary opcode. Only the opcode sections which a particular
4427 // instruction needs for encoding need to be specified.
4428 encode %{
4429 enc_class cdql_enc(no_rax_rdx_RegI div)
4430 %{
4431 // Full implementation of Java idiv and irem; checks for
4432 // special case as described in JVM spec., p.243 & p.271.
4433 //
4434 // normal case special case
4435 //
4436 // input : rax: dividend min_int
4437 // reg: divisor -1
4438 //
4439 // output: rax: quotient (= rax idiv reg) min_int
4440 // rdx: remainder (= rax irem reg) 0
4441 //
4442 // Code sequnce:
4443 //
4444 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4445 // 5: 75 07/08 jne e <normal>
4446 // 7: 33 d2 xor %edx,%edx
4447 // [div >= 8 -> offset + 1]
4448 // [REX_B]
4449 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4450 // c: 74 03/04 je 11 <done>
4451 // 000000000000000e <normal>:
4452 // e: 99 cltd
4453 // [div >= 8 -> offset + 1]
4454 // [REX_B]
4455 // f: f7 f9 idiv $div
4456 // 0000000000000011 <done>:
4457 Label normal;
4458 Label done;
4459
4460 // cmp $0x80000000,%eax
4461 __ cmpl(as_Register(RAX_enc), 0x80000000);
4462
4463 // jne e <normal>
4464 __ jccb(Assembler::notEqual, normal);
4465
4466 // xor %edx,%edx
4467 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4468
4469 // cmp $0xffffffffffffffff,%ecx
4470 __ cmpl($div$$Register, -1);
4471
4472 // je 11 <done>
4473 __ jccb(Assembler::equal, done);
4474
4475 // <normal>
4476 // cltd
4477 __ bind(normal);
4478 __ cdql();
4479
4480 // idivl
4481 // <done>
4482 __ idivl($div$$Register);
4483 __ bind(done);
4484 %}
4485
4486 enc_class cdqq_enc(no_rax_rdx_RegL div)
4487 %{
4488 // Full implementation of Java ldiv and lrem; checks for
4489 // special case as described in JVM spec., p.243 & p.271.
4490 //
4491 // normal case special case
4492 //
4493 // input : rax: dividend min_long
4494 // reg: divisor -1
4495 //
4496 // output: rax: quotient (= rax idiv reg) min_long
4497 // rdx: remainder (= rax irem reg) 0
4498 //
4499 // Code sequnce:
4500 //
4501 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4502 // 7: 00 00 80
4503 // a: 48 39 d0 cmp %rdx,%rax
4504 // d: 75 08 jne 17 <normal>
4505 // f: 33 d2 xor %edx,%edx
4506 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4507 // 15: 74 05 je 1c <done>
4508 // 0000000000000017 <normal>:
4509 // 17: 48 99 cqto
4510 // 19: 48 f7 f9 idiv $div
4511 // 000000000000001c <done>:
4512 Label normal;
4513 Label done;
4514
4515 // mov $0x8000000000000000,%rdx
4516 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4517
4518 // cmp %rdx,%rax
4519 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4520
4521 // jne 17 <normal>
4522 __ jccb(Assembler::notEqual, normal);
4523
4524 // xor %edx,%edx
4525 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4526
4527 // cmp $0xffffffffffffffff,$div
4528 __ cmpq($div$$Register, -1);
4529
4530 // je 1e <done>
4531 __ jccb(Assembler::equal, done);
4532
4533 // <normal>
4534 // cqto
4535 __ bind(normal);
4536 __ cdqq();
4537
4538 // idivq (note: must be emitted by the user of this rule)
4539 // <done>
4540 __ idivq($div$$Register);
4541 __ bind(done);
4542 %}
4543
4544 enc_class clear_avx %{
4545 DEBUG_ONLY(int off0 = __ offset());
4546 if (generate_vzeroupper(Compile::current())) {
4547 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4548 // Clear upper bits of YMM registers when current compiled code uses
4549 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4550 __ vzeroupper();
4551 }
4552 DEBUG_ONLY(int off1 = __ offset());
4553 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4554 %}
4555
4556 enc_class Java_To_Runtime(method meth) %{
4557 __ lea(r10, RuntimeAddress((address)$meth$$method));
4558 __ call(r10);
4559 __ post_call_nop();
4560 %}
4561
4562 enc_class Java_Static_Call(method meth)
4563 %{
4564 // JAVA STATIC CALL
4565 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4566 // determine who we intended to call.
4567 if (!_method) {
4568 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4569 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4570 // The NOP here is purely to ensure that eliding a call to
4571 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4572 __ addr_nop_5();
4573 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4574 } else {
4575 int method_index = resolved_method_index(masm);
4576 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4577 : static_call_Relocation::spec(method_index);
4578 address mark = __ pc();
4579 int call_offset = __ offset();
4580 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4581 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4582 // Calls of the same statically bound method can share
4583 // a stub to the interpreter.
4584 __ code()->shared_stub_to_interp_for(_method, call_offset);
4585 } else {
4586 // Emit stubs for static call.
4587 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4588 __ clear_inst_mark();
4589 if (stub == nullptr) {
4590 ciEnv::current()->record_failure("CodeCache is full");
4591 return;
4592 }
4593 }
4594 }
4595 __ post_call_nop();
4596 %}
4597
4598 enc_class Java_Dynamic_Call(method meth) %{
4599 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4600 __ post_call_nop();
4601 %}
4602
4603 enc_class call_epilog %{
4604 if (VerifyStackAtCalls) {
4605 // Check that stack depth is unchanged: find majik cookie on stack
4606 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4607 Label L;
4608 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4609 __ jccb(Assembler::equal, L);
4610 // Die if stack mismatch
4611 __ int3();
4612 __ bind(L);
4613 }
4614 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4615 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4616 // Search for the corresponding projection, get the register and emit code that initialized it.
4617 uint con = (tf()->range_cc()->cnt() - 1);
4618 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4619 ProjNode* proj = fast_out(i)->as_Proj();
4620 if (proj->_con == con) {
4621 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4622 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4623 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4624 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4625 __ testq(rax, rax);
4626 __ setb(Assembler::notZero, toReg);
4627 __ movzbl(toReg, toReg);
4628 if (reg->is_stack()) {
4629 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4630 __ movq(Address(rsp, st_off), toReg);
4631 }
4632 break;
4633 }
4634 }
4635 if (return_value_is_used()) {
4636 // An inline type is returned as fields in multiple registers.
4637 // Rax either contains an oop if the inline type is buffered or a pointer
4638 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4639 // if the lowest bit is set to allow C2 to use the oop after null checking.
4640 // rax &= (rax & 1) - 1
4641 __ movptr(rscratch1, rax);
4642 __ andptr(rscratch1, 0x1);
4643 __ subptr(rscratch1, 0x1);
4644 __ andptr(rax, rscratch1);
4645 }
4646 }
4647 %}
4648
4649 %}
4650
4651 //----------FRAME--------------------------------------------------------------
4652 // Definition of frame structure and management information.
4653 //
4654 // S T A C K L A Y O U T Allocators stack-slot number
4655 // | (to get allocators register number
4656 // G Owned by | | v add OptoReg::stack0())
4657 // r CALLER | |
4658 // o | +--------+ pad to even-align allocators stack-slot
4659 // w V | pad0 | numbers; owned by CALLER
4660 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4661 // h ^ | in | 5
4662 // | | args | 4 Holes in incoming args owned by SELF
4663 // | | | | 3
4664 // | | +--------+
4665 // V | | old out| Empty on Intel, window on Sparc
4666 // | old |preserve| Must be even aligned.
4667 // | SP-+--------+----> Matcher::_old_SP, even aligned
4668 // | | in | 3 area for Intel ret address
4669 // Owned by |preserve| Empty on Sparc.
4670 // SELF +--------+
4671 // | | pad2 | 2 pad to align old SP
4672 // | +--------+ 1
4673 // | | locks | 0
4674 // | +--------+----> OptoReg::stack0(), even aligned
4675 // | | pad1 | 11 pad to align new SP
4676 // | +--------+
4677 // | | | 10
4678 // | | spills | 9 spills
4679 // V | | 8 (pad0 slot for callee)
4680 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4681 // ^ | out | 7
4682 // | | args | 6 Holes in outgoing args owned by CALLEE
4683 // Owned by +--------+
4684 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4685 // | new |preserve| Must be even-aligned.
4686 // | SP-+--------+----> Matcher::_new_SP, even aligned
4687 // | | |
4688 //
4689 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4690 // known from SELF's arguments and the Java calling convention.
4691 // Region 6-7 is determined per call site.
4692 // Note 2: If the calling convention leaves holes in the incoming argument
4693 // area, those holes are owned by SELF. Holes in the outgoing area
4694 // are owned by the CALLEE. Holes should not be necessary in the
4695 // incoming area, as the Java calling convention is completely under
4696 // the control of the AD file. Doubles can be sorted and packed to
4697 // avoid holes. Holes in the outgoing arguments may be necessary for
4698 // varargs C calling conventions.
4699 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4700 // even aligned with pad0 as needed.
4701 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4702 // region 6-11 is even aligned; it may be padded out more so that
4703 // the region from SP to FP meets the minimum stack alignment.
4704 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4705 // alignment. Region 11, pad1, may be dynamically extended so that
4706 // SP meets the minimum alignment.
4707
4708 frame
4709 %{
4710 // These three registers define part of the calling convention
4711 // between compiled code and the interpreter.
4712 inline_cache_reg(RAX); // Inline Cache Register
4713
4714 // Optional: name the operand used by cisc-spilling to access
4715 // [stack_pointer + offset]
4716 cisc_spilling_operand_name(indOffset32);
4717
4718 // Number of stack slots consumed by locking an object
4719 sync_stack_slots(2);
4720
4721 // Compiled code's Frame Pointer
4722 frame_pointer(RSP);
4723
4724 // Interpreter stores its frame pointer in a register which is
4725 // stored to the stack by I2CAdaptors.
4726 // I2CAdaptors convert from interpreted java to compiled java.
4727 interpreter_frame_pointer(RBP);
4728
4729 // Stack alignment requirement
4730 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4731
4732 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4733 // for calls to C. Supports the var-args backing area for register parms.
4734 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4735
4736 // The after-PROLOG location of the return address. Location of
4737 // return address specifies a type (REG or STACK) and a number
4738 // representing the register number (i.e. - use a register name) or
4739 // stack slot.
4740 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4741 // Otherwise, it is above the locks and verification slot and alignment word
4742 return_addr(STACK - 2 +
4743 align_up((Compile::current()->in_preserve_stack_slots() +
4744 Compile::current()->fixed_slots()),
4745 stack_alignment_in_slots()));
4746
4747 // Location of compiled Java return values. Same as C for now.
4748 return_value
4749 %{
4750 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4751 "only return normal values");
4752
4753 static const int lo[Op_RegL + 1] = {
4754 0,
4755 0,
4756 RAX_num, // Op_RegN
4757 RAX_num, // Op_RegI
4758 RAX_num, // Op_RegP
4759 XMM0_num, // Op_RegF
4760 XMM0_num, // Op_RegD
4761 RAX_num // Op_RegL
4762 };
4763 static const int hi[Op_RegL + 1] = {
4764 0,
4765 0,
4766 OptoReg::Bad, // Op_RegN
4767 OptoReg::Bad, // Op_RegI
4768 RAX_H_num, // Op_RegP
4769 OptoReg::Bad, // Op_RegF
4770 XMM0b_num, // Op_RegD
4771 RAX_H_num // Op_RegL
4772 };
4773 // Excluded flags and vector registers.
4774 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4775 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4776 %}
4777 %}
4778
4779 //----------ATTRIBUTES---------------------------------------------------------
4780 //----------Operand Attributes-------------------------------------------------
4781 op_attrib op_cost(0); // Required cost attribute
4782
4783 //----------Instruction Attributes---------------------------------------------
4784 ins_attrib ins_cost(100); // Required cost attribute
4785 ins_attrib ins_size(8); // Required size attribute (in bits)
4786 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4787 // a non-matching short branch variant
4788 // of some long branch?
4789 ins_attrib ins_alignment(1); // Required alignment attribute (must
4790 // be a power of 2) specifies the
4791 // alignment that some part of the
4792 // instruction (not necessarily the
4793 // start) requires. If > 1, a
4794 // compute_padding() function must be
4795 // provided for the instruction
4796
4797 // Whether this node is expanded during code emission into a sequence of
4798 // instructions and the first instruction can perform an implicit null check.
4799 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4800
4801 //----------OPERANDS-----------------------------------------------------------
4802 // Operand definitions must precede instruction definitions for correct parsing
4803 // in the ADLC because operands constitute user defined types which are used in
4804 // instruction definitions.
4805
4806 //----------Simple Operands----------------------------------------------------
4807 // Immediate Operands
4808 // Integer Immediate
4809 operand immI()
4810 %{
4811 match(ConI);
4812
4813 op_cost(10);
4814 format %{ %}
4815 interface(CONST_INTER);
4816 %}
4817
4818 // Constant for test vs zero
4819 operand immI_0()
4820 %{
4821 predicate(n->get_int() == 0);
4822 match(ConI);
4823
4824 op_cost(0);
4825 format %{ %}
4826 interface(CONST_INTER);
4827 %}
4828
4829 // Constant for increment
4830 operand immI_1()
4831 %{
4832 predicate(n->get_int() == 1);
4833 match(ConI);
4834
4835 op_cost(0);
4836 format %{ %}
4837 interface(CONST_INTER);
4838 %}
4839
4840 // Constant for decrement
4841 operand immI_M1()
4842 %{
4843 predicate(n->get_int() == -1);
4844 match(ConI);
4845
4846 op_cost(0);
4847 format %{ %}
4848 interface(CONST_INTER);
4849 %}
4850
4851 operand immI_2()
4852 %{
4853 predicate(n->get_int() == 2);
4854 match(ConI);
4855
4856 op_cost(0);
4857 format %{ %}
4858 interface(CONST_INTER);
4859 %}
4860
4861 operand immI_4()
4862 %{
4863 predicate(n->get_int() == 4);
4864 match(ConI);
4865
4866 op_cost(0);
4867 format %{ %}
4868 interface(CONST_INTER);
4869 %}
4870
4871 operand immI_8()
4872 %{
4873 predicate(n->get_int() == 8);
4874 match(ConI);
4875
4876 op_cost(0);
4877 format %{ %}
4878 interface(CONST_INTER);
4879 %}
4880
4881 // Valid scale values for addressing modes
4882 operand immI2()
4883 %{
4884 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4885 match(ConI);
4886
4887 format %{ %}
4888 interface(CONST_INTER);
4889 %}
4890
4891 operand immU7()
4892 %{
4893 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4894 match(ConI);
4895
4896 op_cost(5);
4897 format %{ %}
4898 interface(CONST_INTER);
4899 %}
4900
4901 operand immI8()
4902 %{
4903 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4904 match(ConI);
4905
4906 op_cost(5);
4907 format %{ %}
4908 interface(CONST_INTER);
4909 %}
4910
4911 operand immU8()
4912 %{
4913 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4914 match(ConI);
4915
4916 op_cost(5);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 operand immI16()
4922 %{
4923 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4924 match(ConI);
4925
4926 op_cost(10);
4927 format %{ %}
4928 interface(CONST_INTER);
4929 %}
4930
4931 // Int Immediate non-negative
4932 operand immU31()
4933 %{
4934 predicate(n->get_int() >= 0);
4935 match(ConI);
4936
4937 op_cost(0);
4938 format %{ %}
4939 interface(CONST_INTER);
4940 %}
4941
4942 // Pointer Immediate
4943 operand immP()
4944 %{
4945 match(ConP);
4946
4947 op_cost(10);
4948 format %{ %}
4949 interface(CONST_INTER);
4950 %}
4951
4952 // Null Pointer Immediate
4953 operand immP0()
4954 %{
4955 predicate(n->get_ptr() == 0);
4956 match(ConP);
4957
4958 op_cost(5);
4959 format %{ %}
4960 interface(CONST_INTER);
4961 %}
4962
4963 // Pointer Immediate
4964 operand immN() %{
4965 match(ConN);
4966
4967 op_cost(10);
4968 format %{ %}
4969 interface(CONST_INTER);
4970 %}
4971
4972 operand immNKlass() %{
4973 match(ConNKlass);
4974
4975 op_cost(10);
4976 format %{ %}
4977 interface(CONST_INTER);
4978 %}
4979
4980 // Null Pointer Immediate
4981 operand immN0() %{
4982 predicate(n->get_narrowcon() == 0);
4983 match(ConN);
4984
4985 op_cost(5);
4986 format %{ %}
4987 interface(CONST_INTER);
4988 %}
4989
4990 operand immP31()
4991 %{
4992 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4993 && (n->get_ptr() >> 31) == 0);
4994 match(ConP);
4995
4996 op_cost(5);
4997 format %{ %}
4998 interface(CONST_INTER);
4999 %}
5000
5001
5002 // Long Immediate
5003 operand immL()
5004 %{
5005 match(ConL);
5006
5007 op_cost(20);
5008 format %{ %}
5009 interface(CONST_INTER);
5010 %}
5011
5012 // Long Immediate 8-bit
5013 operand immL8()
5014 %{
5015 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5016 match(ConL);
5017
5018 op_cost(5);
5019 format %{ %}
5020 interface(CONST_INTER);
5021 %}
5022
5023 // Long Immediate 32-bit unsigned
5024 operand immUL32()
5025 %{
5026 predicate(n->get_long() == (unsigned int) (n->get_long()));
5027 match(ConL);
5028
5029 op_cost(10);
5030 format %{ %}
5031 interface(CONST_INTER);
5032 %}
5033
5034 // Long Immediate 32-bit signed
5035 operand immL32()
5036 %{
5037 predicate(n->get_long() == (int) (n->get_long()));
5038 match(ConL);
5039
5040 op_cost(15);
5041 format %{ %}
5042 interface(CONST_INTER);
5043 %}
5044
5045 operand immL_Pow2()
5046 %{
5047 predicate(is_power_of_2((julong)n->get_long()));
5048 match(ConL);
5049
5050 op_cost(15);
5051 format %{ %}
5052 interface(CONST_INTER);
5053 %}
5054
5055 operand immL_NotPow2()
5056 %{
5057 predicate(is_power_of_2((julong)~n->get_long()));
5058 match(ConL);
5059
5060 op_cost(15);
5061 format %{ %}
5062 interface(CONST_INTER);
5063 %}
5064
5065 // Long Immediate zero
5066 operand immL0()
5067 %{
5068 predicate(n->get_long() == 0L);
5069 match(ConL);
5070
5071 op_cost(10);
5072 format %{ %}
5073 interface(CONST_INTER);
5074 %}
5075
5076 // Constant for increment
5077 operand immL1()
5078 %{
5079 predicate(n->get_long() == 1);
5080 match(ConL);
5081
5082 format %{ %}
5083 interface(CONST_INTER);
5084 %}
5085
5086 // Constant for decrement
5087 operand immL_M1()
5088 %{
5089 predicate(n->get_long() == -1);
5090 match(ConL);
5091
5092 format %{ %}
5093 interface(CONST_INTER);
5094 %}
5095
5096 // Long Immediate: low 32-bit mask
5097 operand immL_32bits()
5098 %{
5099 predicate(n->get_long() == 0xFFFFFFFFL);
5100 match(ConL);
5101 op_cost(20);
5102
5103 format %{ %}
5104 interface(CONST_INTER);
5105 %}
5106
5107 // Int Immediate: 2^n-1, positive
5108 operand immI_Pow2M1()
5109 %{
5110 predicate((n->get_int() > 0)
5111 && is_power_of_2((juint)n->get_int() + 1));
5112 match(ConI);
5113
5114 op_cost(20);
5115 format %{ %}
5116 interface(CONST_INTER);
5117 %}
5118
5119 // Float Immediate zero
5120 operand immF0()
5121 %{
5122 predicate(jint_cast(n->getf()) == 0);
5123 match(ConF);
5124
5125 op_cost(5);
5126 format %{ %}
5127 interface(CONST_INTER);
5128 %}
5129
5130 // Float Immediate
5131 operand immF()
5132 %{
5133 match(ConF);
5134
5135 op_cost(15);
5136 format %{ %}
5137 interface(CONST_INTER);
5138 %}
5139
5140 // Half Float Immediate
5141 operand immH()
5142 %{
5143 match(ConH);
5144
5145 op_cost(15);
5146 format %{ %}
5147 interface(CONST_INTER);
5148 %}
5149
5150 // Double Immediate zero
5151 operand immD0()
5152 %{
5153 predicate(jlong_cast(n->getd()) == 0);
5154 match(ConD);
5155
5156 op_cost(5);
5157 format %{ %}
5158 interface(CONST_INTER);
5159 %}
5160
5161 // Double Immediate
5162 operand immD()
5163 %{
5164 match(ConD);
5165
5166 op_cost(15);
5167 format %{ %}
5168 interface(CONST_INTER);
5169 %}
5170
5171 // Immediates for special shifts (sign extend)
5172
5173 // Constants for increment
5174 operand immI_16()
5175 %{
5176 predicate(n->get_int() == 16);
5177 match(ConI);
5178
5179 format %{ %}
5180 interface(CONST_INTER);
5181 %}
5182
5183 operand immI_24()
5184 %{
5185 predicate(n->get_int() == 24);
5186 match(ConI);
5187
5188 format %{ %}
5189 interface(CONST_INTER);
5190 %}
5191
5192 // Constant for byte-wide masking
5193 operand immI_255()
5194 %{
5195 predicate(n->get_int() == 255);
5196 match(ConI);
5197
5198 format %{ %}
5199 interface(CONST_INTER);
5200 %}
5201
5202 // Constant for short-wide masking
5203 operand immI_65535()
5204 %{
5205 predicate(n->get_int() == 65535);
5206 match(ConI);
5207
5208 format %{ %}
5209 interface(CONST_INTER);
5210 %}
5211
5212 // Constant for byte-wide masking
5213 operand immL_255()
5214 %{
5215 predicate(n->get_long() == 255);
5216 match(ConL);
5217
5218 format %{ %}
5219 interface(CONST_INTER);
5220 %}
5221
5222 // Constant for short-wide masking
5223 operand immL_65535()
5224 %{
5225 predicate(n->get_long() == 65535);
5226 match(ConL);
5227
5228 format %{ %}
5229 interface(CONST_INTER);
5230 %}
5231
5232 operand kReg()
5233 %{
5234 constraint(ALLOC_IN_RC(vectmask_reg));
5235 match(RegVectMask);
5236 format %{%}
5237 interface(REG_INTER);
5238 %}
5239
5240 // Register Operands
5241 // Integer Register
5242 operand rRegI()
5243 %{
5244 constraint(ALLOC_IN_RC(int_reg));
5245 match(RegI);
5246
5247 match(rax_RegI);
5248 match(rbx_RegI);
5249 match(rcx_RegI);
5250 match(rdx_RegI);
5251 match(rdi_RegI);
5252
5253 format %{ %}
5254 interface(REG_INTER);
5255 %}
5256
5257 // Special Registers
5258 operand rax_RegI()
5259 %{
5260 constraint(ALLOC_IN_RC(int_rax_reg));
5261 match(RegI);
5262 match(rRegI);
5263
5264 format %{ "RAX" %}
5265 interface(REG_INTER);
5266 %}
5267
5268 // Special Registers
5269 operand rbx_RegI()
5270 %{
5271 constraint(ALLOC_IN_RC(int_rbx_reg));
5272 match(RegI);
5273 match(rRegI);
5274
5275 format %{ "RBX" %}
5276 interface(REG_INTER);
5277 %}
5278
5279 operand rcx_RegI()
5280 %{
5281 constraint(ALLOC_IN_RC(int_rcx_reg));
5282 match(RegI);
5283 match(rRegI);
5284
5285 format %{ "RCX" %}
5286 interface(REG_INTER);
5287 %}
5288
5289 operand rdx_RegI()
5290 %{
5291 constraint(ALLOC_IN_RC(int_rdx_reg));
5292 match(RegI);
5293 match(rRegI);
5294
5295 format %{ "RDX" %}
5296 interface(REG_INTER);
5297 %}
5298
5299 operand rdi_RegI()
5300 %{
5301 constraint(ALLOC_IN_RC(int_rdi_reg));
5302 match(RegI);
5303 match(rRegI);
5304
5305 format %{ "RDI" %}
5306 interface(REG_INTER);
5307 %}
5308
5309 operand no_rax_rdx_RegI()
5310 %{
5311 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5312 match(RegI);
5313 match(rbx_RegI);
5314 match(rcx_RegI);
5315 match(rdi_RegI);
5316
5317 format %{ %}
5318 interface(REG_INTER);
5319 %}
5320
5321 operand no_rbp_r13_RegI()
5322 %{
5323 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5324 match(RegI);
5325 match(rRegI);
5326 match(rax_RegI);
5327 match(rbx_RegI);
5328 match(rcx_RegI);
5329 match(rdx_RegI);
5330 match(rdi_RegI);
5331
5332 format %{ %}
5333 interface(REG_INTER);
5334 %}
5335
5336 // Pointer Register
5337 operand any_RegP()
5338 %{
5339 constraint(ALLOC_IN_RC(any_reg));
5340 match(RegP);
5341 match(rax_RegP);
5342 match(rbx_RegP);
5343 match(rdi_RegP);
5344 match(rsi_RegP);
5345 match(rbp_RegP);
5346 match(r15_RegP);
5347 match(rRegP);
5348
5349 format %{ %}
5350 interface(REG_INTER);
5351 %}
5352
5353 operand rRegP()
5354 %{
5355 constraint(ALLOC_IN_RC(ptr_reg));
5356 match(RegP);
5357 match(rax_RegP);
5358 match(rbx_RegP);
5359 match(rdi_RegP);
5360 match(rsi_RegP);
5361 match(rbp_RegP); // See Q&A below about
5362 match(r15_RegP); // r15_RegP and rbp_RegP.
5363
5364 format %{ %}
5365 interface(REG_INTER);
5366 %}
5367
5368 operand rRegN() %{
5369 constraint(ALLOC_IN_RC(int_reg));
5370 match(RegN);
5371
5372 format %{ %}
5373 interface(REG_INTER);
5374 %}
5375
5376 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5377 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5378 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5379 // The output of an instruction is controlled by the allocator, which respects
5380 // register class masks, not match rules. Unless an instruction mentions
5381 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5382 // by the allocator as an input.
5383 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5384 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5385 // result, RBP is not included in the output of the instruction either.
5386
5387 // This operand is not allowed to use RBP even if
5388 // RBP is not used to hold the frame pointer.
5389 operand no_rbp_RegP()
5390 %{
5391 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5392 match(RegP);
5393 match(rbx_RegP);
5394 match(rsi_RegP);
5395 match(rdi_RegP);
5396
5397 format %{ %}
5398 interface(REG_INTER);
5399 %}
5400
5401 // Special Registers
5402 // Return a pointer value
5403 operand rax_RegP()
5404 %{
5405 constraint(ALLOC_IN_RC(ptr_rax_reg));
5406 match(RegP);
5407 match(rRegP);
5408
5409 format %{ %}
5410 interface(REG_INTER);
5411 %}
5412
5413 // Special Registers
5414 // Return a compressed pointer value
5415 operand rax_RegN()
5416 %{
5417 constraint(ALLOC_IN_RC(int_rax_reg));
5418 match(RegN);
5419 match(rRegN);
5420
5421 format %{ %}
5422 interface(REG_INTER);
5423 %}
5424
5425 // Used in AtomicAdd
5426 operand rbx_RegP()
5427 %{
5428 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5429 match(RegP);
5430 match(rRegP);
5431
5432 format %{ %}
5433 interface(REG_INTER);
5434 %}
5435
5436 operand rsi_RegP()
5437 %{
5438 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5439 match(RegP);
5440 match(rRegP);
5441
5442 format %{ %}
5443 interface(REG_INTER);
5444 %}
5445
5446 operand rbp_RegP()
5447 %{
5448 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5449 match(RegP);
5450 match(rRegP);
5451
5452 format %{ %}
5453 interface(REG_INTER);
5454 %}
5455
5456 // Used in rep stosq
5457 operand rdi_RegP()
5458 %{
5459 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5460 match(RegP);
5461 match(rRegP);
5462
5463 format %{ %}
5464 interface(REG_INTER);
5465 %}
5466
5467 operand r15_RegP()
5468 %{
5469 constraint(ALLOC_IN_RC(ptr_r15_reg));
5470 match(RegP);
5471 match(rRegP);
5472
5473 format %{ %}
5474 interface(REG_INTER);
5475 %}
5476
5477 operand rRegL()
5478 %{
5479 constraint(ALLOC_IN_RC(long_reg));
5480 match(RegL);
5481 match(rax_RegL);
5482 match(rdx_RegL);
5483
5484 format %{ %}
5485 interface(REG_INTER);
5486 %}
5487
5488 // Special Registers
5489 operand no_rax_rdx_RegL()
5490 %{
5491 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5492 match(RegL);
5493 match(rRegL);
5494
5495 format %{ %}
5496 interface(REG_INTER);
5497 %}
5498
5499 operand rax_RegL()
5500 %{
5501 constraint(ALLOC_IN_RC(long_rax_reg));
5502 match(RegL);
5503 match(rRegL);
5504
5505 format %{ "RAX" %}
5506 interface(REG_INTER);
5507 %}
5508
5509 operand rcx_RegL()
5510 %{
5511 constraint(ALLOC_IN_RC(long_rcx_reg));
5512 match(RegL);
5513 match(rRegL);
5514
5515 format %{ %}
5516 interface(REG_INTER);
5517 %}
5518
5519 operand rdx_RegL()
5520 %{
5521 constraint(ALLOC_IN_RC(long_rdx_reg));
5522 match(RegL);
5523 match(rRegL);
5524
5525 format %{ %}
5526 interface(REG_INTER);
5527 %}
5528
5529 operand r11_RegL()
5530 %{
5531 constraint(ALLOC_IN_RC(long_r11_reg));
5532 match(RegL);
5533 match(rRegL);
5534
5535 format %{ %}
5536 interface(REG_INTER);
5537 %}
5538
5539 operand no_rbp_r13_RegL()
5540 %{
5541 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5542 match(RegL);
5543 match(rRegL);
5544 match(rax_RegL);
5545 match(rcx_RegL);
5546 match(rdx_RegL);
5547
5548 format %{ %}
5549 interface(REG_INTER);
5550 %}
5551
5552 // Flags register, used as output of compare instructions
5553 operand rFlagsReg()
5554 %{
5555 constraint(ALLOC_IN_RC(int_flags));
5556 match(RegFlags);
5557
5558 format %{ "RFLAGS" %}
5559 interface(REG_INTER);
5560 %}
5561
5562 // Flags register, used as output of FLOATING POINT compare instructions
5563 operand rFlagsRegU()
5564 %{
5565 constraint(ALLOC_IN_RC(int_flags));
5566 match(RegFlags);
5567
5568 format %{ "RFLAGS_U" %}
5569 interface(REG_INTER);
5570 %}
5571
5572 operand rFlagsRegUCF() %{
5573 constraint(ALLOC_IN_RC(int_flags));
5574 match(RegFlags);
5575 predicate(false);
5576
5577 format %{ "RFLAGS_U_CF" %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Float register operands
5582 operand regF() %{
5583 constraint(ALLOC_IN_RC(float_reg));
5584 match(RegF);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Float register operands
5591 operand legRegF() %{
5592 constraint(ALLOC_IN_RC(float_reg_legacy));
5593 match(RegF);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Float register operands
5600 operand vlRegF() %{
5601 constraint(ALLOC_IN_RC(float_reg_vl));
5602 match(RegF);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 // Double register operands
5609 operand regD() %{
5610 constraint(ALLOC_IN_RC(double_reg));
5611 match(RegD);
5612
5613 format %{ %}
5614 interface(REG_INTER);
5615 %}
5616
5617 // Double register operands
5618 operand legRegD() %{
5619 constraint(ALLOC_IN_RC(double_reg_legacy));
5620 match(RegD);
5621
5622 format %{ %}
5623 interface(REG_INTER);
5624 %}
5625
5626 // Double register operands
5627 operand vlRegD() %{
5628 constraint(ALLOC_IN_RC(double_reg_vl));
5629 match(RegD);
5630
5631 format %{ %}
5632 interface(REG_INTER);
5633 %}
5634
5635 //----------Memory Operands----------------------------------------------------
5636 // Direct Memory Operand
5637 // operand direct(immP addr)
5638 // %{
5639 // match(addr);
5640
5641 // format %{ "[$addr]" %}
5642 // interface(MEMORY_INTER) %{
5643 // base(0xFFFFFFFF);
5644 // index(0x4);
5645 // scale(0x0);
5646 // disp($addr);
5647 // %}
5648 // %}
5649
5650 // Indirect Memory Operand
5651 operand indirect(any_RegP reg)
5652 %{
5653 constraint(ALLOC_IN_RC(ptr_reg));
5654 match(reg);
5655
5656 format %{ "[$reg]" %}
5657 interface(MEMORY_INTER) %{
5658 base($reg);
5659 index(0x4);
5660 scale(0x0);
5661 disp(0x0);
5662 %}
5663 %}
5664
5665 // Indirect Memory Plus Short Offset Operand
5666 operand indOffset8(any_RegP reg, immL8 off)
5667 %{
5668 constraint(ALLOC_IN_RC(ptr_reg));
5669 match(AddP reg off);
5670
5671 format %{ "[$reg + $off (8-bit)]" %}
5672 interface(MEMORY_INTER) %{
5673 base($reg);
5674 index(0x4);
5675 scale(0x0);
5676 disp($off);
5677 %}
5678 %}
5679
5680 // Indirect Memory Plus Long Offset Operand
5681 operand indOffset32(any_RegP reg, immL32 off)
5682 %{
5683 constraint(ALLOC_IN_RC(ptr_reg));
5684 match(AddP reg off);
5685
5686 format %{ "[$reg + $off (32-bit)]" %}
5687 interface(MEMORY_INTER) %{
5688 base($reg);
5689 index(0x4);
5690 scale(0x0);
5691 disp($off);
5692 %}
5693 %}
5694
5695 // Indirect Memory Plus Index Register Plus Offset Operand
5696 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5697 %{
5698 constraint(ALLOC_IN_RC(ptr_reg));
5699 match(AddP (AddP reg lreg) off);
5700
5701 op_cost(10);
5702 format %{"[$reg + $off + $lreg]" %}
5703 interface(MEMORY_INTER) %{
5704 base($reg);
5705 index($lreg);
5706 scale(0x0);
5707 disp($off);
5708 %}
5709 %}
5710
5711 // Indirect Memory Plus Index Register Plus Offset Operand
5712 operand indIndex(any_RegP reg, rRegL lreg)
5713 %{
5714 constraint(ALLOC_IN_RC(ptr_reg));
5715 match(AddP reg lreg);
5716
5717 op_cost(10);
5718 format %{"[$reg + $lreg]" %}
5719 interface(MEMORY_INTER) %{
5720 base($reg);
5721 index($lreg);
5722 scale(0x0);
5723 disp(0x0);
5724 %}
5725 %}
5726
5727 // Indirect Memory Times Scale Plus Index Register
5728 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5729 %{
5730 constraint(ALLOC_IN_RC(ptr_reg));
5731 match(AddP reg (LShiftL lreg scale));
5732
5733 op_cost(10);
5734 format %{"[$reg + $lreg << $scale]" %}
5735 interface(MEMORY_INTER) %{
5736 base($reg);
5737 index($lreg);
5738 scale($scale);
5739 disp(0x0);
5740 %}
5741 %}
5742
5743 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5744 %{
5745 constraint(ALLOC_IN_RC(ptr_reg));
5746 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5747 match(AddP reg (LShiftL (ConvI2L idx) scale));
5748
5749 op_cost(10);
5750 format %{"[$reg + pos $idx << $scale]" %}
5751 interface(MEMORY_INTER) %{
5752 base($reg);
5753 index($idx);
5754 scale($scale);
5755 disp(0x0);
5756 %}
5757 %}
5758
5759 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5760 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5761 %{
5762 constraint(ALLOC_IN_RC(ptr_reg));
5763 match(AddP (AddP reg (LShiftL lreg scale)) off);
5764
5765 op_cost(10);
5766 format %{"[$reg + $off + $lreg << $scale]" %}
5767 interface(MEMORY_INTER) %{
5768 base($reg);
5769 index($lreg);
5770 scale($scale);
5771 disp($off);
5772 %}
5773 %}
5774
5775 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5776 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5777 %{
5778 constraint(ALLOC_IN_RC(ptr_reg));
5779 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5780 match(AddP (AddP reg (ConvI2L idx)) off);
5781
5782 op_cost(10);
5783 format %{"[$reg + $off + $idx]" %}
5784 interface(MEMORY_INTER) %{
5785 base($reg);
5786 index($idx);
5787 scale(0x0);
5788 disp($off);
5789 %}
5790 %}
5791
5792 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5793 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5794 %{
5795 constraint(ALLOC_IN_RC(ptr_reg));
5796 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5797 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5798
5799 op_cost(10);
5800 format %{"[$reg + $off + $idx << $scale]" %}
5801 interface(MEMORY_INTER) %{
5802 base($reg);
5803 index($idx);
5804 scale($scale);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Narrow Oop Operand
5810 operand indCompressedOop(rRegN reg) %{
5811 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5812 constraint(ALLOC_IN_RC(ptr_reg));
5813 match(DecodeN reg);
5814
5815 op_cost(10);
5816 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5817 interface(MEMORY_INTER) %{
5818 base(0xc); // R12
5819 index($reg);
5820 scale(0x3);
5821 disp(0x0);
5822 %}
5823 %}
5824
5825 // Indirect Narrow Oop Plus Offset Operand
5826 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5827 // we can't free r12 even with CompressedOops::base() == nullptr.
5828 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5829 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5830 constraint(ALLOC_IN_RC(ptr_reg));
5831 match(AddP (DecodeN reg) off);
5832
5833 op_cost(10);
5834 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5835 interface(MEMORY_INTER) %{
5836 base(0xc); // R12
5837 index($reg);
5838 scale(0x3);
5839 disp($off);
5840 %}
5841 %}
5842
5843 // Indirect Memory Operand
5844 operand indirectNarrow(rRegN reg)
5845 %{
5846 predicate(CompressedOops::shift() == 0);
5847 constraint(ALLOC_IN_RC(ptr_reg));
5848 match(DecodeN reg);
5849
5850 format %{ "[$reg]" %}
5851 interface(MEMORY_INTER) %{
5852 base($reg);
5853 index(0x4);
5854 scale(0x0);
5855 disp(0x0);
5856 %}
5857 %}
5858
5859 // Indirect Memory Plus Short Offset Operand
5860 operand indOffset8Narrow(rRegN reg, immL8 off)
5861 %{
5862 predicate(CompressedOops::shift() == 0);
5863 constraint(ALLOC_IN_RC(ptr_reg));
5864 match(AddP (DecodeN reg) off);
5865
5866 format %{ "[$reg + $off (8-bit)]" %}
5867 interface(MEMORY_INTER) %{
5868 base($reg);
5869 index(0x4);
5870 scale(0x0);
5871 disp($off);
5872 %}
5873 %}
5874
5875 // Indirect Memory Plus Long Offset Operand
5876 operand indOffset32Narrow(rRegN reg, immL32 off)
5877 %{
5878 predicate(CompressedOops::shift() == 0);
5879 constraint(ALLOC_IN_RC(ptr_reg));
5880 match(AddP (DecodeN reg) off);
5881
5882 format %{ "[$reg + $off (32-bit)]" %}
5883 interface(MEMORY_INTER) %{
5884 base($reg);
5885 index(0x4);
5886 scale(0x0);
5887 disp($off);
5888 %}
5889 %}
5890
5891 // Indirect Memory Plus Index Register Plus Offset Operand
5892 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5893 %{
5894 predicate(CompressedOops::shift() == 0);
5895 constraint(ALLOC_IN_RC(ptr_reg));
5896 match(AddP (AddP (DecodeN reg) lreg) off);
5897
5898 op_cost(10);
5899 format %{"[$reg + $off + $lreg]" %}
5900 interface(MEMORY_INTER) %{
5901 base($reg);
5902 index($lreg);
5903 scale(0x0);
5904 disp($off);
5905 %}
5906 %}
5907
5908 // Indirect Memory Plus Index Register Plus Offset Operand
5909 operand indIndexNarrow(rRegN reg, rRegL lreg)
5910 %{
5911 predicate(CompressedOops::shift() == 0);
5912 constraint(ALLOC_IN_RC(ptr_reg));
5913 match(AddP (DecodeN reg) lreg);
5914
5915 op_cost(10);
5916 format %{"[$reg + $lreg]" %}
5917 interface(MEMORY_INTER) %{
5918 base($reg);
5919 index($lreg);
5920 scale(0x0);
5921 disp(0x0);
5922 %}
5923 %}
5924
5925 // Indirect Memory Times Scale Plus Index Register
5926 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5927 %{
5928 predicate(CompressedOops::shift() == 0);
5929 constraint(ALLOC_IN_RC(ptr_reg));
5930 match(AddP (DecodeN reg) (LShiftL lreg scale));
5931
5932 op_cost(10);
5933 format %{"[$reg + $lreg << $scale]" %}
5934 interface(MEMORY_INTER) %{
5935 base($reg);
5936 index($lreg);
5937 scale($scale);
5938 disp(0x0);
5939 %}
5940 %}
5941
5942 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5943 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5944 %{
5945 predicate(CompressedOops::shift() == 0);
5946 constraint(ALLOC_IN_RC(ptr_reg));
5947 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5948
5949 op_cost(10);
5950 format %{"[$reg + $off + $lreg << $scale]" %}
5951 interface(MEMORY_INTER) %{
5952 base($reg);
5953 index($lreg);
5954 scale($scale);
5955 disp($off);
5956 %}
5957 %}
5958
5959 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5960 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5961 %{
5962 constraint(ALLOC_IN_RC(ptr_reg));
5963 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5964 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5965
5966 op_cost(10);
5967 format %{"[$reg + $off + $idx]" %}
5968 interface(MEMORY_INTER) %{
5969 base($reg);
5970 index($idx);
5971 scale(0x0);
5972 disp($off);
5973 %}
5974 %}
5975
5976 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5977 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5978 %{
5979 constraint(ALLOC_IN_RC(ptr_reg));
5980 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5981 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5982
5983 op_cost(10);
5984 format %{"[$reg + $off + $idx << $scale]" %}
5985 interface(MEMORY_INTER) %{
5986 base($reg);
5987 index($idx);
5988 scale($scale);
5989 disp($off);
5990 %}
5991 %}
5992
5993 //----------Special Memory Operands--------------------------------------------
5994 // Stack Slot Operand - This operand is used for loading and storing temporary
5995 // values on the stack where a match requires a value to
5996 // flow through memory.
5997 operand stackSlotP(sRegP reg)
5998 %{
5999 constraint(ALLOC_IN_RC(stack_slots));
6000 // No match rule because this operand is only generated in matching
6001
6002 format %{ "[$reg]" %}
6003 interface(MEMORY_INTER) %{
6004 base(0x4); // RSP
6005 index(0x4); // No Index
6006 scale(0x0); // No Scale
6007 disp($reg); // Stack Offset
6008 %}
6009 %}
6010
6011 operand stackSlotI(sRegI reg)
6012 %{
6013 constraint(ALLOC_IN_RC(stack_slots));
6014 // No match rule because this operand is only generated in matching
6015
6016 format %{ "[$reg]" %}
6017 interface(MEMORY_INTER) %{
6018 base(0x4); // RSP
6019 index(0x4); // No Index
6020 scale(0x0); // No Scale
6021 disp($reg); // Stack Offset
6022 %}
6023 %}
6024
6025 operand stackSlotF(sRegF reg)
6026 %{
6027 constraint(ALLOC_IN_RC(stack_slots));
6028 // No match rule because this operand is only generated in matching
6029
6030 format %{ "[$reg]" %}
6031 interface(MEMORY_INTER) %{
6032 base(0x4); // RSP
6033 index(0x4); // No Index
6034 scale(0x0); // No Scale
6035 disp($reg); // Stack Offset
6036 %}
6037 %}
6038
6039 operand stackSlotD(sRegD reg)
6040 %{
6041 constraint(ALLOC_IN_RC(stack_slots));
6042 // No match rule because this operand is only generated in matching
6043
6044 format %{ "[$reg]" %}
6045 interface(MEMORY_INTER) %{
6046 base(0x4); // RSP
6047 index(0x4); // No Index
6048 scale(0x0); // No Scale
6049 disp($reg); // Stack Offset
6050 %}
6051 %}
6052 operand stackSlotL(sRegL reg)
6053 %{
6054 constraint(ALLOC_IN_RC(stack_slots));
6055 // No match rule because this operand is only generated in matching
6056
6057 format %{ "[$reg]" %}
6058 interface(MEMORY_INTER) %{
6059 base(0x4); // RSP
6060 index(0x4); // No Index
6061 scale(0x0); // No Scale
6062 disp($reg); // Stack Offset
6063 %}
6064 %}
6065
6066 //----------Conditional Branch Operands----------------------------------------
6067 // Comparison Op - This is the operation of the comparison, and is limited to
6068 // the following set of codes:
6069 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6070 //
6071 // Other attributes of the comparison, such as unsignedness, are specified
6072 // by the comparison instruction that sets a condition code flags register.
6073 // That result is represented by a flags operand whose subtype is appropriate
6074 // to the unsignedness (etc.) of the comparison.
6075 //
6076 // Later, the instruction which matches both the Comparison Op (a Bool) and
6077 // the flags (produced by the Cmp) specifies the coding of the comparison op
6078 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6079
6080 // Comparison Code
6081 operand cmpOp()
6082 %{
6083 match(Bool);
6084
6085 format %{ "" %}
6086 interface(COND_INTER) %{
6087 equal(0x4, "e");
6088 not_equal(0x5, "ne");
6089 less(0xC, "l");
6090 greater_equal(0xD, "ge");
6091 less_equal(0xE, "le");
6092 greater(0xF, "g");
6093 overflow(0x0, "o");
6094 no_overflow(0x1, "no");
6095 %}
6096 %}
6097
6098 // Comparison Code, unsigned compare. Used by FP also, with
6099 // C2 (unordered) turned into GT or LT already. The other bits
6100 // C0 and C3 are turned into Carry & Zero flags.
6101 operand cmpOpU()
6102 %{
6103 match(Bool);
6104
6105 format %{ "" %}
6106 interface(COND_INTER) %{
6107 equal(0x4, "e");
6108 not_equal(0x5, "ne");
6109 less(0x2, "b");
6110 greater_equal(0x3, "ae");
6111 less_equal(0x6, "be");
6112 greater(0x7, "a");
6113 overflow(0x0, "o");
6114 no_overflow(0x1, "no");
6115 %}
6116 %}
6117
6118
6119 // Floating comparisons that don't require any fixup for the unordered case,
6120 // If both inputs of the comparison are the same, ZF is always set so we
6121 // don't need to use cmpOpUCF2 for eq/ne
6122 operand cmpOpUCF() %{
6123 match(Bool);
6124 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6125 n->as_Bool()->_test._test == BoolTest::ge ||
6126 n->as_Bool()->_test._test == BoolTest::le ||
6127 n->as_Bool()->_test._test == BoolTest::gt ||
6128 n->in(1)->in(1) == n->in(1)->in(2));
6129 format %{ "" %}
6130 interface(COND_INTER) %{
6131 equal(0xb, "np");
6132 not_equal(0xa, "p");
6133 less(0x2, "b");
6134 greater_equal(0x3, "ae");
6135 less_equal(0x6, "be");
6136 greater(0x7, "a");
6137 overflow(0x0, "o");
6138 no_overflow(0x1, "no");
6139 %}
6140 %}
6141
6142
6143 // Floating comparisons that can be fixed up with extra conditional jumps
6144 operand cmpOpUCF2() %{
6145 match(Bool);
6146 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6147 n->as_Bool()->_test._test == BoolTest::eq) &&
6148 n->in(1)->in(1) != n->in(1)->in(2));
6149 format %{ "" %}
6150 interface(COND_INTER) %{
6151 equal(0x4, "e");
6152 not_equal(0x5, "ne");
6153 less(0x2, "b");
6154 greater_equal(0x3, "ae");
6155 less_equal(0x6, "be");
6156 greater(0x7, "a");
6157 overflow(0x0, "o");
6158 no_overflow(0x1, "no");
6159 %}
6160 %}
6161
6162 // Operands for bound floating pointer register arguments
6163 operand rxmm0() %{
6164 constraint(ALLOC_IN_RC(xmm0_reg));
6165 match(VecX);
6166 format%{%}
6167 interface(REG_INTER);
6168 %}
6169
6170 // Vectors
6171
6172 // Dummy generic vector class. Should be used for all vector operands.
6173 // Replaced with vec[SDXYZ] during post-selection pass.
6174 operand vec() %{
6175 constraint(ALLOC_IN_RC(dynamic));
6176 match(VecX);
6177 match(VecY);
6178 match(VecZ);
6179 match(VecS);
6180 match(VecD);
6181
6182 format %{ %}
6183 interface(REG_INTER);
6184 %}
6185
6186 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6187 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6188 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6189 // runtime code generation via reg_class_dynamic.
6190 operand legVec() %{
6191 constraint(ALLOC_IN_RC(dynamic));
6192 match(VecX);
6193 match(VecY);
6194 match(VecZ);
6195 match(VecS);
6196 match(VecD);
6197
6198 format %{ %}
6199 interface(REG_INTER);
6200 %}
6201
6202 // Replaces vec during post-selection cleanup. See above.
6203 operand vecS() %{
6204 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6205 match(VecS);
6206
6207 format %{ %}
6208 interface(REG_INTER);
6209 %}
6210
6211 // Replaces legVec during post-selection cleanup. See above.
6212 operand legVecS() %{
6213 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6214 match(VecS);
6215
6216 format %{ %}
6217 interface(REG_INTER);
6218 %}
6219
6220 // Replaces vec during post-selection cleanup. See above.
6221 operand vecD() %{
6222 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6223 match(VecD);
6224
6225 format %{ %}
6226 interface(REG_INTER);
6227 %}
6228
6229 // Replaces legVec during post-selection cleanup. See above.
6230 operand legVecD() %{
6231 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6232 match(VecD);
6233
6234 format %{ %}
6235 interface(REG_INTER);
6236 %}
6237
6238 // Replaces vec during post-selection cleanup. See above.
6239 operand vecX() %{
6240 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6241 match(VecX);
6242
6243 format %{ %}
6244 interface(REG_INTER);
6245 %}
6246
6247 // Replaces legVec during post-selection cleanup. See above.
6248 operand legVecX() %{
6249 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6250 match(VecX);
6251
6252 format %{ %}
6253 interface(REG_INTER);
6254 %}
6255
6256 // Replaces vec during post-selection cleanup. See above.
6257 operand vecY() %{
6258 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6259 match(VecY);
6260
6261 format %{ %}
6262 interface(REG_INTER);
6263 %}
6264
6265 // Replaces legVec during post-selection cleanup. See above.
6266 operand legVecY() %{
6267 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6268 match(VecY);
6269
6270 format %{ %}
6271 interface(REG_INTER);
6272 %}
6273
6274 // Replaces vec during post-selection cleanup. See above.
6275 operand vecZ() %{
6276 constraint(ALLOC_IN_RC(vectorz_reg));
6277 match(VecZ);
6278
6279 format %{ %}
6280 interface(REG_INTER);
6281 %}
6282
6283 // Replaces legVec during post-selection cleanup. See above.
6284 operand legVecZ() %{
6285 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6286 match(VecZ);
6287
6288 format %{ %}
6289 interface(REG_INTER);
6290 %}
6291
6292 //----------OPERAND CLASSES----------------------------------------------------
6293 // Operand Classes are groups of operands that are used as to simplify
6294 // instruction definitions by not requiring the AD writer to specify separate
6295 // instructions for every form of operand when the instruction accepts
6296 // multiple operand types with the same basic encoding and format. The classic
6297 // case of this is memory operands.
6298
6299 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6300 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6301 indCompressedOop, indCompressedOopOffset,
6302 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6303 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6304 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6305
6306 //----------PIPELINE-----------------------------------------------------------
6307 // Rules which define the behavior of the target architectures pipeline.
6308 pipeline %{
6309
6310 //----------ATTRIBUTES---------------------------------------------------------
6311 attributes %{
6312 variable_size_instructions; // Fixed size instructions
6313 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6314 instruction_unit_size = 1; // An instruction is 1 bytes long
6315 instruction_fetch_unit_size = 16; // The processor fetches one line
6316 instruction_fetch_units = 1; // of 16 bytes
6317 %}
6318
6319 //----------RESOURCES----------------------------------------------------------
6320 // Resources are the functional units available to the machine
6321
6322 // Generic P2/P3 pipeline
6323 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6324 // 3 instructions decoded per cycle.
6325 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6326 // 3 ALU op, only ALU0 handles mul instructions.
6327 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6328 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6329 BR, FPU,
6330 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6331
6332 //----------PIPELINE DESCRIPTION-----------------------------------------------
6333 // Pipeline Description specifies the stages in the machine's pipeline
6334
6335 // Generic P2/P3 pipeline
6336 pipe_desc(S0, S1, S2, S3, S4, S5);
6337
6338 //----------PIPELINE CLASSES---------------------------------------------------
6339 // Pipeline Classes describe the stages in which input and output are
6340 // referenced by the hardware pipeline.
6341
6342 // Naming convention: ialu or fpu
6343 // Then: _reg
6344 // Then: _reg if there is a 2nd register
6345 // Then: _long if it's a pair of instructions implementing a long
6346 // Then: _fat if it requires the big decoder
6347 // Or: _mem if it requires the big decoder and a memory unit.
6348
6349 // Integer ALU reg operation
6350 pipe_class ialu_reg(rRegI dst)
6351 %{
6352 single_instruction;
6353 dst : S4(write);
6354 dst : S3(read);
6355 DECODE : S0; // any decoder
6356 ALU : S3; // any alu
6357 %}
6358
6359 // Long ALU reg operation
6360 pipe_class ialu_reg_long(rRegL dst)
6361 %{
6362 instruction_count(2);
6363 dst : S4(write);
6364 dst : S3(read);
6365 DECODE : S0(2); // any 2 decoders
6366 ALU : S3(2); // both alus
6367 %}
6368
6369 // Integer ALU reg operation using big decoder
6370 pipe_class ialu_reg_fat(rRegI dst)
6371 %{
6372 single_instruction;
6373 dst : S4(write);
6374 dst : S3(read);
6375 D0 : S0; // big decoder only
6376 ALU : S3; // any alu
6377 %}
6378
6379 // Integer ALU reg-reg operation
6380 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6381 %{
6382 single_instruction;
6383 dst : S4(write);
6384 src : S3(read);
6385 DECODE : S0; // any decoder
6386 ALU : S3; // any alu
6387 %}
6388
6389 // Integer ALU reg-reg operation
6390 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6391 %{
6392 single_instruction;
6393 dst : S4(write);
6394 src : S3(read);
6395 D0 : S0; // big decoder only
6396 ALU : S3; // any alu
6397 %}
6398
6399 // Integer ALU reg-mem operation
6400 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6401 %{
6402 single_instruction;
6403 dst : S5(write);
6404 mem : S3(read);
6405 D0 : S0; // big decoder only
6406 ALU : S4; // any alu
6407 MEM : S3; // any mem
6408 %}
6409
6410 // Integer mem operation (prefetch)
6411 pipe_class ialu_mem(memory mem)
6412 %{
6413 single_instruction;
6414 mem : S3(read);
6415 D0 : S0; // big decoder only
6416 MEM : S3; // any mem
6417 %}
6418
6419 // Integer Store to Memory
6420 pipe_class ialu_mem_reg(memory mem, rRegI src)
6421 %{
6422 single_instruction;
6423 mem : S3(read);
6424 src : S5(read);
6425 D0 : S0; // big decoder only
6426 ALU : S4; // any alu
6427 MEM : S3;
6428 %}
6429
6430 // // Long Store to Memory
6431 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6432 // %{
6433 // instruction_count(2);
6434 // mem : S3(read);
6435 // src : S5(read);
6436 // D0 : S0(2); // big decoder only; twice
6437 // ALU : S4(2); // any 2 alus
6438 // MEM : S3(2); // Both mems
6439 // %}
6440
6441 // Integer Store to Memory
6442 pipe_class ialu_mem_imm(memory mem)
6443 %{
6444 single_instruction;
6445 mem : S3(read);
6446 D0 : S0; // big decoder only
6447 ALU : S4; // any alu
6448 MEM : S3;
6449 %}
6450
6451 // Integer ALU0 reg-reg operation
6452 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6453 %{
6454 single_instruction;
6455 dst : S4(write);
6456 src : S3(read);
6457 D0 : S0; // Big decoder only
6458 ALU0 : S3; // only alu0
6459 %}
6460
6461 // Integer ALU0 reg-mem operation
6462 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6463 %{
6464 single_instruction;
6465 dst : S5(write);
6466 mem : S3(read);
6467 D0 : S0; // big decoder only
6468 ALU0 : S4; // ALU0 only
6469 MEM : S3; // any mem
6470 %}
6471
6472 // Integer ALU reg-reg operation
6473 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6474 %{
6475 single_instruction;
6476 cr : S4(write);
6477 src1 : S3(read);
6478 src2 : S3(read);
6479 DECODE : S0; // any decoder
6480 ALU : S3; // any alu
6481 %}
6482
6483 // Integer ALU reg-imm operation
6484 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6485 %{
6486 single_instruction;
6487 cr : S4(write);
6488 src1 : S3(read);
6489 DECODE : S0; // any decoder
6490 ALU : S3; // any alu
6491 %}
6492
6493 // Integer ALU reg-mem operation
6494 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6495 %{
6496 single_instruction;
6497 cr : S4(write);
6498 src1 : S3(read);
6499 src2 : S3(read);
6500 D0 : S0; // big decoder only
6501 ALU : S4; // any alu
6502 MEM : S3;
6503 %}
6504
6505 // Conditional move reg-reg
6506 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6507 %{
6508 instruction_count(4);
6509 y : S4(read);
6510 q : S3(read);
6511 p : S3(read);
6512 DECODE : S0(4); // any decoder
6513 %}
6514
6515 // Conditional move reg-reg
6516 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6517 %{
6518 single_instruction;
6519 dst : S4(write);
6520 src : S3(read);
6521 cr : S3(read);
6522 DECODE : S0; // any decoder
6523 %}
6524
6525 // Conditional move reg-mem
6526 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6527 %{
6528 single_instruction;
6529 dst : S4(write);
6530 src : S3(read);
6531 cr : S3(read);
6532 DECODE : S0; // any decoder
6533 MEM : S3;
6534 %}
6535
6536 // Conditional move reg-reg long
6537 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6538 %{
6539 single_instruction;
6540 dst : S4(write);
6541 src : S3(read);
6542 cr : S3(read);
6543 DECODE : S0(2); // any 2 decoders
6544 %}
6545
6546 // Float reg-reg operation
6547 pipe_class fpu_reg(regD dst)
6548 %{
6549 instruction_count(2);
6550 dst : S3(read);
6551 DECODE : S0(2); // any 2 decoders
6552 FPU : S3;
6553 %}
6554
6555 // Float reg-reg operation
6556 pipe_class fpu_reg_reg(regD dst, regD src)
6557 %{
6558 instruction_count(2);
6559 dst : S4(write);
6560 src : S3(read);
6561 DECODE : S0(2); // any 2 decoders
6562 FPU : S3;
6563 %}
6564
6565 // Float reg-reg operation
6566 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6567 %{
6568 instruction_count(3);
6569 dst : S4(write);
6570 src1 : S3(read);
6571 src2 : S3(read);
6572 DECODE : S0(3); // any 3 decoders
6573 FPU : S3(2);
6574 %}
6575
6576 // Float reg-reg operation
6577 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6578 %{
6579 instruction_count(4);
6580 dst : S4(write);
6581 src1 : S3(read);
6582 src2 : S3(read);
6583 src3 : S3(read);
6584 DECODE : S0(4); // any 3 decoders
6585 FPU : S3(2);
6586 %}
6587
6588 // Float reg-reg operation
6589 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6590 %{
6591 instruction_count(4);
6592 dst : S4(write);
6593 src1 : S3(read);
6594 src2 : S3(read);
6595 src3 : S3(read);
6596 DECODE : S1(3); // any 3 decoders
6597 D0 : S0; // Big decoder only
6598 FPU : S3(2);
6599 MEM : S3;
6600 %}
6601
6602 // Float reg-mem operation
6603 pipe_class fpu_reg_mem(regD dst, memory mem)
6604 %{
6605 instruction_count(2);
6606 dst : S5(write);
6607 mem : S3(read);
6608 D0 : S0; // big decoder only
6609 DECODE : S1; // any decoder for FPU POP
6610 FPU : S4;
6611 MEM : S3; // any mem
6612 %}
6613
6614 // Float reg-mem operation
6615 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6616 %{
6617 instruction_count(3);
6618 dst : S5(write);
6619 src1 : S3(read);
6620 mem : S3(read);
6621 D0 : S0; // big decoder only
6622 DECODE : S1(2); // any decoder for FPU POP
6623 FPU : S4;
6624 MEM : S3; // any mem
6625 %}
6626
6627 // Float mem-reg operation
6628 pipe_class fpu_mem_reg(memory mem, regD src)
6629 %{
6630 instruction_count(2);
6631 src : S5(read);
6632 mem : S3(read);
6633 DECODE : S0; // any decoder for FPU PUSH
6634 D0 : S1; // big decoder only
6635 FPU : S4;
6636 MEM : S3; // any mem
6637 %}
6638
6639 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6640 %{
6641 instruction_count(3);
6642 src1 : S3(read);
6643 src2 : S3(read);
6644 mem : S3(read);
6645 DECODE : S0(2); // any decoder for FPU PUSH
6646 D0 : S1; // big decoder only
6647 FPU : S4;
6648 MEM : S3; // any mem
6649 %}
6650
6651 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6652 %{
6653 instruction_count(3);
6654 src1 : S3(read);
6655 src2 : S3(read);
6656 mem : S4(read);
6657 DECODE : S0; // any decoder for FPU PUSH
6658 D0 : S0(2); // big decoder only
6659 FPU : S4;
6660 MEM : S3(2); // any mem
6661 %}
6662
6663 pipe_class fpu_mem_mem(memory dst, memory src1)
6664 %{
6665 instruction_count(2);
6666 src1 : S3(read);
6667 dst : S4(read);
6668 D0 : S0(2); // big decoder only
6669 MEM : S3(2); // any mem
6670 %}
6671
6672 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6673 %{
6674 instruction_count(3);
6675 src1 : S3(read);
6676 src2 : S3(read);
6677 dst : S4(read);
6678 D0 : S0(3); // big decoder only
6679 FPU : S4;
6680 MEM : S3(3); // any mem
6681 %}
6682
6683 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6684 %{
6685 instruction_count(3);
6686 src1 : S4(read);
6687 mem : S4(read);
6688 DECODE : S0; // any decoder for FPU PUSH
6689 D0 : S0(2); // big decoder only
6690 FPU : S4;
6691 MEM : S3(2); // any mem
6692 %}
6693
6694 // Float load constant
6695 pipe_class fpu_reg_con(regD dst)
6696 %{
6697 instruction_count(2);
6698 dst : S5(write);
6699 D0 : S0; // big decoder only for the load
6700 DECODE : S1; // any decoder for FPU POP
6701 FPU : S4;
6702 MEM : S3; // any mem
6703 %}
6704
6705 // Float load constant
6706 pipe_class fpu_reg_reg_con(regD dst, regD src)
6707 %{
6708 instruction_count(3);
6709 dst : S5(write);
6710 src : S3(read);
6711 D0 : S0; // big decoder only for the load
6712 DECODE : S1(2); // any decoder for FPU POP
6713 FPU : S4;
6714 MEM : S3; // any mem
6715 %}
6716
6717 // UnConditional branch
6718 pipe_class pipe_jmp(label labl)
6719 %{
6720 single_instruction;
6721 BR : S3;
6722 %}
6723
6724 // Conditional branch
6725 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6726 %{
6727 single_instruction;
6728 cr : S1(read);
6729 BR : S3;
6730 %}
6731
6732 // Allocation idiom
6733 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6734 %{
6735 instruction_count(1); force_serialization;
6736 fixed_latency(6);
6737 heap_ptr : S3(read);
6738 DECODE : S0(3);
6739 D0 : S2;
6740 MEM : S3;
6741 ALU : S3(2);
6742 dst : S5(write);
6743 BR : S5;
6744 %}
6745
6746 // Generic big/slow expanded idiom
6747 pipe_class pipe_slow()
6748 %{
6749 instruction_count(10); multiple_bundles; force_serialization;
6750 fixed_latency(100);
6751 D0 : S0(2);
6752 MEM : S3(2);
6753 %}
6754
6755 // The real do-nothing guy
6756 pipe_class empty()
6757 %{
6758 instruction_count(0);
6759 %}
6760
6761 // Define the class for the Nop node
6762 define
6763 %{
6764 MachNop = empty;
6765 %}
6766
6767 %}
6768
6769 //----------INSTRUCTIONS-------------------------------------------------------
6770 //
6771 // match -- States which machine-independent subtree may be replaced
6772 // by this instruction.
6773 // ins_cost -- The estimated cost of this instruction is used by instruction
6774 // selection to identify a minimum cost tree of machine
6775 // instructions that matches a tree of machine-independent
6776 // instructions.
6777 // format -- A string providing the disassembly for this instruction.
6778 // The value of an instruction's operand may be inserted
6779 // by referring to it with a '$' prefix.
6780 // opcode -- Three instruction opcodes may be provided. These are referred
6781 // to within an encode class as $primary, $secondary, and $tertiary
6782 // rrspectively. The primary opcode is commonly used to
6783 // indicate the type of machine instruction, while secondary
6784 // and tertiary are often used for prefix options or addressing
6785 // modes.
6786 // ins_encode -- A list of encode classes with parameters. The encode class
6787 // name must have been defined in an 'enc_class' specification
6788 // in the encode section of the architecture description.
6789
6790 // ============================================================================
6791
6792 instruct ShouldNotReachHere() %{
6793 match(Halt);
6794 format %{ "stop\t# ShouldNotReachHere" %}
6795 ins_encode %{
6796 if (is_reachable()) {
6797 const char* str = __ code_string(_halt_reason);
6798 __ stop(str);
6799 }
6800 %}
6801 ins_pipe(pipe_slow);
6802 %}
6803
6804 // ============================================================================
6805
6806 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6807 // Load Float
6808 instruct MoveF2VL(vlRegF dst, regF src) %{
6809 match(Set dst src);
6810 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6811 ins_encode %{
6812 ShouldNotReachHere();
6813 %}
6814 ins_pipe( fpu_reg_reg );
6815 %}
6816
6817 // Load Float
6818 instruct MoveF2LEG(legRegF dst, regF src) %{
6819 match(Set dst src);
6820 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6821 ins_encode %{
6822 ShouldNotReachHere();
6823 %}
6824 ins_pipe( fpu_reg_reg );
6825 %}
6826
6827 // Load Float
6828 instruct MoveVL2F(regF dst, vlRegF src) %{
6829 match(Set dst src);
6830 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6831 ins_encode %{
6832 ShouldNotReachHere();
6833 %}
6834 ins_pipe( fpu_reg_reg );
6835 %}
6836
6837 // Load Float
6838 instruct MoveLEG2F(regF dst, legRegF src) %{
6839 match(Set dst src);
6840 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6841 ins_encode %{
6842 ShouldNotReachHere();
6843 %}
6844 ins_pipe( fpu_reg_reg );
6845 %}
6846
6847 // Load Double
6848 instruct MoveD2VL(vlRegD dst, regD src) %{
6849 match(Set dst src);
6850 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6851 ins_encode %{
6852 ShouldNotReachHere();
6853 %}
6854 ins_pipe( fpu_reg_reg );
6855 %}
6856
6857 // Load Double
6858 instruct MoveD2LEG(legRegD dst, regD src) %{
6859 match(Set dst src);
6860 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6861 ins_encode %{
6862 ShouldNotReachHere();
6863 %}
6864 ins_pipe( fpu_reg_reg );
6865 %}
6866
6867 // Load Double
6868 instruct MoveVL2D(regD dst, vlRegD src) %{
6869 match(Set dst src);
6870 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6871 ins_encode %{
6872 ShouldNotReachHere();
6873 %}
6874 ins_pipe( fpu_reg_reg );
6875 %}
6876
6877 // Load Double
6878 instruct MoveLEG2D(regD dst, legRegD src) %{
6879 match(Set dst src);
6880 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6881 ins_encode %{
6882 ShouldNotReachHere();
6883 %}
6884 ins_pipe( fpu_reg_reg );
6885 %}
6886
6887 //----------Load/Store/Move Instructions---------------------------------------
6888 //----------Load Instructions--------------------------------------------------
6889
6890 // Load Byte (8 bit signed)
6891 instruct loadB(rRegI dst, memory mem)
6892 %{
6893 match(Set dst (LoadB mem));
6894
6895 ins_cost(125);
6896 format %{ "movsbl $dst, $mem\t# byte" %}
6897
6898 ins_encode %{
6899 __ movsbl($dst$$Register, $mem$$Address);
6900 %}
6901
6902 ins_pipe(ialu_reg_mem);
6903 %}
6904
6905 // Load Byte (8 bit signed) into Long Register
6906 instruct loadB2L(rRegL dst, memory mem)
6907 %{
6908 match(Set dst (ConvI2L (LoadB mem)));
6909
6910 ins_cost(125);
6911 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6912
6913 ins_encode %{
6914 __ movsbq($dst$$Register, $mem$$Address);
6915 %}
6916
6917 ins_pipe(ialu_reg_mem);
6918 %}
6919
6920 // Load Unsigned Byte (8 bit UNsigned)
6921 instruct loadUB(rRegI dst, memory mem)
6922 %{
6923 match(Set dst (LoadUB mem));
6924
6925 ins_cost(125);
6926 format %{ "movzbl $dst, $mem\t# ubyte" %}
6927
6928 ins_encode %{
6929 __ movzbl($dst$$Register, $mem$$Address);
6930 %}
6931
6932 ins_pipe(ialu_reg_mem);
6933 %}
6934
6935 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6936 instruct loadUB2L(rRegL dst, memory mem)
6937 %{
6938 match(Set dst (ConvI2L (LoadUB mem)));
6939
6940 ins_cost(125);
6941 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6942
6943 ins_encode %{
6944 __ movzbq($dst$$Register, $mem$$Address);
6945 %}
6946
6947 ins_pipe(ialu_reg_mem);
6948 %}
6949
6950 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6951 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6952 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6953 effect(KILL cr);
6954
6955 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6956 "andl $dst, right_n_bits($mask, 8)" %}
6957 ins_encode %{
6958 Register Rdst = $dst$$Register;
6959 __ movzbq(Rdst, $mem$$Address);
6960 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6961 %}
6962 ins_pipe(ialu_reg_mem);
6963 %}
6964
6965 // Load Short (16 bit signed)
6966 instruct loadS(rRegI dst, memory mem)
6967 %{
6968 match(Set dst (LoadS mem));
6969
6970 ins_cost(125);
6971 format %{ "movswl $dst, $mem\t# short" %}
6972
6973 ins_encode %{
6974 __ movswl($dst$$Register, $mem$$Address);
6975 %}
6976
6977 ins_pipe(ialu_reg_mem);
6978 %}
6979
6980 // Load Short (16 bit signed) to Byte (8 bit signed)
6981 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6982 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6983
6984 ins_cost(125);
6985 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6986 ins_encode %{
6987 __ movsbl($dst$$Register, $mem$$Address);
6988 %}
6989 ins_pipe(ialu_reg_mem);
6990 %}
6991
6992 // Load Short (16 bit signed) into Long Register
6993 instruct loadS2L(rRegL dst, memory mem)
6994 %{
6995 match(Set dst (ConvI2L (LoadS mem)));
6996
6997 ins_cost(125);
6998 format %{ "movswq $dst, $mem\t# short -> long" %}
6999
7000 ins_encode %{
7001 __ movswq($dst$$Register, $mem$$Address);
7002 %}
7003
7004 ins_pipe(ialu_reg_mem);
7005 %}
7006
7007 // Load Unsigned Short/Char (16 bit UNsigned)
7008 instruct loadUS(rRegI dst, memory mem)
7009 %{
7010 match(Set dst (LoadUS mem));
7011
7012 ins_cost(125);
7013 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7014
7015 ins_encode %{
7016 __ movzwl($dst$$Register, $mem$$Address);
7017 %}
7018
7019 ins_pipe(ialu_reg_mem);
7020 %}
7021
7022 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7023 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7024 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7025
7026 ins_cost(125);
7027 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7028 ins_encode %{
7029 __ movsbl($dst$$Register, $mem$$Address);
7030 %}
7031 ins_pipe(ialu_reg_mem);
7032 %}
7033
7034 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7035 instruct loadUS2L(rRegL dst, memory mem)
7036 %{
7037 match(Set dst (ConvI2L (LoadUS mem)));
7038
7039 ins_cost(125);
7040 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7041
7042 ins_encode %{
7043 __ movzwq($dst$$Register, $mem$$Address);
7044 %}
7045
7046 ins_pipe(ialu_reg_mem);
7047 %}
7048
7049 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7050 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7051 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7052
7053 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7054 ins_encode %{
7055 __ movzbq($dst$$Register, $mem$$Address);
7056 %}
7057 ins_pipe(ialu_reg_mem);
7058 %}
7059
7060 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7061 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7062 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7063 effect(KILL cr);
7064
7065 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7066 "andl $dst, right_n_bits($mask, 16)" %}
7067 ins_encode %{
7068 Register Rdst = $dst$$Register;
7069 __ movzwq(Rdst, $mem$$Address);
7070 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7071 %}
7072 ins_pipe(ialu_reg_mem);
7073 %}
7074
7075 // Load Integer
7076 instruct loadI(rRegI dst, memory mem)
7077 %{
7078 match(Set dst (LoadI mem));
7079
7080 ins_cost(125);
7081 format %{ "movl $dst, $mem\t# int" %}
7082
7083 ins_encode %{
7084 __ movl($dst$$Register, $mem$$Address);
7085 %}
7086
7087 ins_pipe(ialu_reg_mem);
7088 %}
7089
7090 // Load Integer (32 bit signed) to Byte (8 bit signed)
7091 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7092 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7093
7094 ins_cost(125);
7095 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7096 ins_encode %{
7097 __ movsbl($dst$$Register, $mem$$Address);
7098 %}
7099 ins_pipe(ialu_reg_mem);
7100 %}
7101
7102 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7103 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7104 match(Set dst (AndI (LoadI mem) mask));
7105
7106 ins_cost(125);
7107 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7108 ins_encode %{
7109 __ movzbl($dst$$Register, $mem$$Address);
7110 %}
7111 ins_pipe(ialu_reg_mem);
7112 %}
7113
7114 // Load Integer (32 bit signed) to Short (16 bit signed)
7115 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7116 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7117
7118 ins_cost(125);
7119 format %{ "movswl $dst, $mem\t# int -> short" %}
7120 ins_encode %{
7121 __ movswl($dst$$Register, $mem$$Address);
7122 %}
7123 ins_pipe(ialu_reg_mem);
7124 %}
7125
7126 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7127 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7128 match(Set dst (AndI (LoadI mem) mask));
7129
7130 ins_cost(125);
7131 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7132 ins_encode %{
7133 __ movzwl($dst$$Register, $mem$$Address);
7134 %}
7135 ins_pipe(ialu_reg_mem);
7136 %}
7137
7138 // Load Integer into Long Register
7139 instruct loadI2L(rRegL dst, memory mem)
7140 %{
7141 match(Set dst (ConvI2L (LoadI mem)));
7142
7143 ins_cost(125);
7144 format %{ "movslq $dst, $mem\t# int -> long" %}
7145
7146 ins_encode %{
7147 __ movslq($dst$$Register, $mem$$Address);
7148 %}
7149
7150 ins_pipe(ialu_reg_mem);
7151 %}
7152
7153 // Load Integer with mask 0xFF into Long Register
7154 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7155 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7156
7157 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7158 ins_encode %{
7159 __ movzbq($dst$$Register, $mem$$Address);
7160 %}
7161 ins_pipe(ialu_reg_mem);
7162 %}
7163
7164 // Load Integer with mask 0xFFFF into Long Register
7165 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7166 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7167
7168 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7169 ins_encode %{
7170 __ movzwq($dst$$Register, $mem$$Address);
7171 %}
7172 ins_pipe(ialu_reg_mem);
7173 %}
7174
7175 // Load Integer with a 31-bit mask into Long Register
7176 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7177 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7178 effect(KILL cr);
7179
7180 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7181 "andl $dst, $mask" %}
7182 ins_encode %{
7183 Register Rdst = $dst$$Register;
7184 __ movl(Rdst, $mem$$Address);
7185 __ andl(Rdst, $mask$$constant);
7186 %}
7187 ins_pipe(ialu_reg_mem);
7188 %}
7189
7190 // Load Unsigned Integer into Long Register
7191 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7192 %{
7193 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7194
7195 ins_cost(125);
7196 format %{ "movl $dst, $mem\t# uint -> long" %}
7197
7198 ins_encode %{
7199 __ movl($dst$$Register, $mem$$Address);
7200 %}
7201
7202 ins_pipe(ialu_reg_mem);
7203 %}
7204
7205 // Load Long
7206 instruct loadL(rRegL dst, memory mem)
7207 %{
7208 match(Set dst (LoadL mem));
7209
7210 ins_cost(125);
7211 format %{ "movq $dst, $mem\t# long" %}
7212
7213 ins_encode %{
7214 __ movq($dst$$Register, $mem$$Address);
7215 %}
7216
7217 ins_pipe(ialu_reg_mem); // XXX
7218 %}
7219
7220 // Load Range
7221 instruct loadRange(rRegI dst, memory mem)
7222 %{
7223 match(Set dst (LoadRange mem));
7224
7225 ins_cost(125); // XXX
7226 format %{ "movl $dst, $mem\t# range" %}
7227 ins_encode %{
7228 __ movl($dst$$Register, $mem$$Address);
7229 %}
7230 ins_pipe(ialu_reg_mem);
7231 %}
7232
7233 // Load Pointer
7234 instruct loadP(rRegP dst, memory mem)
7235 %{
7236 match(Set dst (LoadP mem));
7237 predicate(n->as_Load()->barrier_data() == 0);
7238
7239 ins_cost(125); // XXX
7240 format %{ "movq $dst, $mem\t# ptr" %}
7241 ins_encode %{
7242 __ movq($dst$$Register, $mem$$Address);
7243 %}
7244 ins_pipe(ialu_reg_mem); // XXX
7245 %}
7246
7247 // Load Compressed Pointer
7248 instruct loadN(rRegN dst, memory mem)
7249 %{
7250 predicate(n->as_Load()->barrier_data() == 0);
7251 match(Set dst (LoadN mem));
7252
7253 ins_cost(125); // XXX
7254 format %{ "movl $dst, $mem\t# compressed ptr" %}
7255 ins_encode %{
7256 __ movl($dst$$Register, $mem$$Address);
7257 %}
7258 ins_pipe(ialu_reg_mem); // XXX
7259 %}
7260
7261
7262 // Load Klass Pointer
7263 instruct loadKlass(rRegP dst, memory mem)
7264 %{
7265 match(Set dst (LoadKlass mem));
7266
7267 ins_cost(125); // XXX
7268 format %{ "movq $dst, $mem\t# class" %}
7269 ins_encode %{
7270 __ movq($dst$$Register, $mem$$Address);
7271 %}
7272 ins_pipe(ialu_reg_mem); // XXX
7273 %}
7274
7275 // Load narrow Klass Pointer
7276 instruct loadNKlass(rRegN dst, memory mem)
7277 %{
7278 predicate(!UseCompactObjectHeaders);
7279 match(Set dst (LoadNKlass mem));
7280
7281 ins_cost(125); // XXX
7282 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7283 ins_encode %{
7284 __ movl($dst$$Register, $mem$$Address);
7285 %}
7286 ins_pipe(ialu_reg_mem); // XXX
7287 %}
7288
7289 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7290 %{
7291 predicate(UseCompactObjectHeaders);
7292 match(Set dst (LoadNKlass mem));
7293 effect(KILL cr);
7294 ins_cost(125);
7295 format %{
7296 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7297 "shrl $dst, markWord::klass_shift_at_offset"
7298 %}
7299 ins_encode %{
7300 if (UseAPX) {
7301 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7302 }
7303 else {
7304 __ movl($dst$$Register, $mem$$Address);
7305 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7306 }
7307 %}
7308 ins_pipe(ialu_reg_mem);
7309 %}
7310
7311 // Load Float
7312 instruct loadF(regF dst, memory mem)
7313 %{
7314 match(Set dst (LoadF mem));
7315
7316 ins_cost(145); // XXX
7317 format %{ "movss $dst, $mem\t# float" %}
7318 ins_encode %{
7319 __ movflt($dst$$XMMRegister, $mem$$Address);
7320 %}
7321 ins_pipe(pipe_slow); // XXX
7322 %}
7323
7324 // Load Double
7325 instruct loadD_partial(regD dst, memory mem)
7326 %{
7327 predicate(!UseXmmLoadAndClearUpper);
7328 match(Set dst (LoadD mem));
7329
7330 ins_cost(145); // XXX
7331 format %{ "movlpd $dst, $mem\t# double" %}
7332 ins_encode %{
7333 __ movdbl($dst$$XMMRegister, $mem$$Address);
7334 %}
7335 ins_pipe(pipe_slow); // XXX
7336 %}
7337
7338 instruct loadD(regD dst, memory mem)
7339 %{
7340 predicate(UseXmmLoadAndClearUpper);
7341 match(Set dst (LoadD mem));
7342
7343 ins_cost(145); // XXX
7344 format %{ "movsd $dst, $mem\t# double" %}
7345 ins_encode %{
7346 __ movdbl($dst$$XMMRegister, $mem$$Address);
7347 %}
7348 ins_pipe(pipe_slow); // XXX
7349 %}
7350
7351 // max = java.lang.Math.max(float a, float b)
7352 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7353 predicate(VM_Version::supports_avx10_2());
7354 match(Set dst (MaxF a b));
7355 format %{ "maxF $dst, $a, $b" %}
7356 ins_encode %{
7357 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7358 %}
7359 ins_pipe( pipe_slow );
7360 %}
7361
7362 // max = java.lang.Math.max(float a, float b)
7363 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7364 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7365 match(Set dst (MaxF a b));
7366 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7367 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7368 ins_encode %{
7369 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7370 %}
7371 ins_pipe( pipe_slow );
7372 %}
7373
7374 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7375 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7376 match(Set dst (MaxF a b));
7377 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7378
7379 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7380 ins_encode %{
7381 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7382 false /*min*/, true /*single*/);
7383 %}
7384 ins_pipe( pipe_slow );
7385 %}
7386
7387 // max = java.lang.Math.max(double a, double b)
7388 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7389 predicate(VM_Version::supports_avx10_2());
7390 match(Set dst (MaxD a b));
7391 format %{ "maxD $dst, $a, $b" %}
7392 ins_encode %{
7393 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7394 %}
7395 ins_pipe( pipe_slow );
7396 %}
7397
7398 // max = java.lang.Math.max(double a, double b)
7399 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7400 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7401 match(Set dst (MaxD a b));
7402 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7403 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7404 ins_encode %{
7405 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7406 %}
7407 ins_pipe( pipe_slow );
7408 %}
7409
7410 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7411 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7412 match(Set dst (MaxD a b));
7413 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7414
7415 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7416 ins_encode %{
7417 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7418 false /*min*/, false /*single*/);
7419 %}
7420 ins_pipe( pipe_slow );
7421 %}
7422
7423 // max = java.lang.Math.min(float a, float b)
7424 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7425 predicate(VM_Version::supports_avx10_2());
7426 match(Set dst (MinF a b));
7427 format %{ "minF $dst, $a, $b" %}
7428 ins_encode %{
7429 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7430 %}
7431 ins_pipe( pipe_slow );
7432 %}
7433
7434 // min = java.lang.Math.min(float a, float b)
7435 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7436 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7437 match(Set dst (MinF a b));
7438 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7439 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7440 ins_encode %{
7441 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7442 %}
7443 ins_pipe( pipe_slow );
7444 %}
7445
7446 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7447 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7448 match(Set dst (MinF a b));
7449 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7450
7451 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7452 ins_encode %{
7453 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7454 true /*min*/, true /*single*/);
7455 %}
7456 ins_pipe( pipe_slow );
7457 %}
7458
7459 // max = java.lang.Math.min(double a, double b)
7460 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7461 predicate(VM_Version::supports_avx10_2());
7462 match(Set dst (MinD a b));
7463 format %{ "minD $dst, $a, $b" %}
7464 ins_encode %{
7465 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7466 %}
7467 ins_pipe( pipe_slow );
7468 %}
7469
7470 // min = java.lang.Math.min(double a, double b)
7471 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7472 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7473 match(Set dst (MinD a b));
7474 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7475 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7476 ins_encode %{
7477 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7478 %}
7479 ins_pipe( pipe_slow );
7480 %}
7481
7482 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7483 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7484 match(Set dst (MinD a b));
7485 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7486
7487 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7488 ins_encode %{
7489 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7490 true /*min*/, false /*single*/);
7491 %}
7492 ins_pipe( pipe_slow );
7493 %}
7494
7495 // Load Effective Address
7496 instruct leaP8(rRegP dst, indOffset8 mem)
7497 %{
7498 match(Set dst mem);
7499
7500 ins_cost(110); // XXX
7501 format %{ "leaq $dst, $mem\t# ptr 8" %}
7502 ins_encode %{
7503 __ leaq($dst$$Register, $mem$$Address);
7504 %}
7505 ins_pipe(ialu_reg_reg_fat);
7506 %}
7507
7508 instruct leaP32(rRegP dst, indOffset32 mem)
7509 %{
7510 match(Set dst mem);
7511
7512 ins_cost(110);
7513 format %{ "leaq $dst, $mem\t# ptr 32" %}
7514 ins_encode %{
7515 __ leaq($dst$$Register, $mem$$Address);
7516 %}
7517 ins_pipe(ialu_reg_reg_fat);
7518 %}
7519
7520 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7521 %{
7522 match(Set dst mem);
7523
7524 ins_cost(110);
7525 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7526 ins_encode %{
7527 __ leaq($dst$$Register, $mem$$Address);
7528 %}
7529 ins_pipe(ialu_reg_reg_fat);
7530 %}
7531
7532 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7533 %{
7534 match(Set dst mem);
7535
7536 ins_cost(110);
7537 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7538 ins_encode %{
7539 __ leaq($dst$$Register, $mem$$Address);
7540 %}
7541 ins_pipe(ialu_reg_reg_fat);
7542 %}
7543
7544 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7545 %{
7546 match(Set dst mem);
7547
7548 ins_cost(110);
7549 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7550 ins_encode %{
7551 __ leaq($dst$$Register, $mem$$Address);
7552 %}
7553 ins_pipe(ialu_reg_reg_fat);
7554 %}
7555
7556 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7557 %{
7558 match(Set dst mem);
7559
7560 ins_cost(110);
7561 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7562 ins_encode %{
7563 __ leaq($dst$$Register, $mem$$Address);
7564 %}
7565 ins_pipe(ialu_reg_reg_fat);
7566 %}
7567
7568 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7569 %{
7570 match(Set dst mem);
7571
7572 ins_cost(110);
7573 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7574 ins_encode %{
7575 __ leaq($dst$$Register, $mem$$Address);
7576 %}
7577 ins_pipe(ialu_reg_reg_fat);
7578 %}
7579
7580 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7581 %{
7582 match(Set dst mem);
7583
7584 ins_cost(110);
7585 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7586 ins_encode %{
7587 __ leaq($dst$$Register, $mem$$Address);
7588 %}
7589 ins_pipe(ialu_reg_reg_fat);
7590 %}
7591
7592 // Load Effective Address which uses Narrow (32-bits) oop
7593 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7594 %{
7595 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7596 match(Set dst mem);
7597
7598 ins_cost(110);
7599 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7600 ins_encode %{
7601 __ leaq($dst$$Register, $mem$$Address);
7602 %}
7603 ins_pipe(ialu_reg_reg_fat);
7604 %}
7605
7606 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7607 %{
7608 predicate(CompressedOops::shift() == 0);
7609 match(Set dst mem);
7610
7611 ins_cost(110); // XXX
7612 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7613 ins_encode %{
7614 __ leaq($dst$$Register, $mem$$Address);
7615 %}
7616 ins_pipe(ialu_reg_reg_fat);
7617 %}
7618
7619 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7620 %{
7621 predicate(CompressedOops::shift() == 0);
7622 match(Set dst mem);
7623
7624 ins_cost(110);
7625 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7626 ins_encode %{
7627 __ leaq($dst$$Register, $mem$$Address);
7628 %}
7629 ins_pipe(ialu_reg_reg_fat);
7630 %}
7631
7632 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7633 %{
7634 predicate(CompressedOops::shift() == 0);
7635 match(Set dst mem);
7636
7637 ins_cost(110);
7638 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7639 ins_encode %{
7640 __ leaq($dst$$Register, $mem$$Address);
7641 %}
7642 ins_pipe(ialu_reg_reg_fat);
7643 %}
7644
7645 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7646 %{
7647 predicate(CompressedOops::shift() == 0);
7648 match(Set dst mem);
7649
7650 ins_cost(110);
7651 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7652 ins_encode %{
7653 __ leaq($dst$$Register, $mem$$Address);
7654 %}
7655 ins_pipe(ialu_reg_reg_fat);
7656 %}
7657
7658 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7659 %{
7660 predicate(CompressedOops::shift() == 0);
7661 match(Set dst mem);
7662
7663 ins_cost(110);
7664 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7665 ins_encode %{
7666 __ leaq($dst$$Register, $mem$$Address);
7667 %}
7668 ins_pipe(ialu_reg_reg_fat);
7669 %}
7670
7671 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7672 %{
7673 predicate(CompressedOops::shift() == 0);
7674 match(Set dst mem);
7675
7676 ins_cost(110);
7677 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7678 ins_encode %{
7679 __ leaq($dst$$Register, $mem$$Address);
7680 %}
7681 ins_pipe(ialu_reg_reg_fat);
7682 %}
7683
7684 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7685 %{
7686 predicate(CompressedOops::shift() == 0);
7687 match(Set dst mem);
7688
7689 ins_cost(110);
7690 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7691 ins_encode %{
7692 __ leaq($dst$$Register, $mem$$Address);
7693 %}
7694 ins_pipe(ialu_reg_reg_fat);
7695 %}
7696
7697 instruct loadConI(rRegI dst, immI src)
7698 %{
7699 match(Set dst src);
7700
7701 format %{ "movl $dst, $src\t# int" %}
7702 ins_encode %{
7703 __ movl($dst$$Register, $src$$constant);
7704 %}
7705 ins_pipe(ialu_reg_fat); // XXX
7706 %}
7707
7708 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7709 %{
7710 match(Set dst src);
7711 effect(KILL cr);
7712
7713 ins_cost(50);
7714 format %{ "xorl $dst, $dst\t# int" %}
7715 ins_encode %{
7716 __ xorl($dst$$Register, $dst$$Register);
7717 %}
7718 ins_pipe(ialu_reg);
7719 %}
7720
7721 instruct loadConL(rRegL dst, immL src)
7722 %{
7723 match(Set dst src);
7724
7725 ins_cost(150);
7726 format %{ "movq $dst, $src\t# long" %}
7727 ins_encode %{
7728 __ mov64($dst$$Register, $src$$constant);
7729 %}
7730 ins_pipe(ialu_reg);
7731 %}
7732
7733 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7734 %{
7735 match(Set dst src);
7736 effect(KILL cr);
7737
7738 ins_cost(50);
7739 format %{ "xorl $dst, $dst\t# long" %}
7740 ins_encode %{
7741 __ xorl($dst$$Register, $dst$$Register);
7742 %}
7743 ins_pipe(ialu_reg); // XXX
7744 %}
7745
7746 instruct loadConUL32(rRegL dst, immUL32 src)
7747 %{
7748 match(Set dst src);
7749
7750 ins_cost(60);
7751 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7752 ins_encode %{
7753 __ movl($dst$$Register, $src$$constant);
7754 %}
7755 ins_pipe(ialu_reg);
7756 %}
7757
7758 instruct loadConL32(rRegL dst, immL32 src)
7759 %{
7760 match(Set dst src);
7761
7762 ins_cost(70);
7763 format %{ "movq $dst, $src\t# long (32-bit)" %}
7764 ins_encode %{
7765 __ movq($dst$$Register, $src$$constant);
7766 %}
7767 ins_pipe(ialu_reg);
7768 %}
7769
7770 instruct loadConP(rRegP dst, immP con) %{
7771 match(Set dst con);
7772
7773 format %{ "movq $dst, $con\t# ptr" %}
7774 ins_encode %{
7775 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7776 %}
7777 ins_pipe(ialu_reg_fat); // XXX
7778 %}
7779
7780 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7781 %{
7782 match(Set dst src);
7783 effect(KILL cr);
7784
7785 ins_cost(50);
7786 format %{ "xorl $dst, $dst\t# ptr" %}
7787 ins_encode %{
7788 __ xorl($dst$$Register, $dst$$Register);
7789 %}
7790 ins_pipe(ialu_reg);
7791 %}
7792
7793 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7794 %{
7795 match(Set dst src);
7796 effect(KILL cr);
7797
7798 ins_cost(60);
7799 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7800 ins_encode %{
7801 __ movl($dst$$Register, $src$$constant);
7802 %}
7803 ins_pipe(ialu_reg);
7804 %}
7805
7806 instruct loadConF(regF dst, immF con) %{
7807 match(Set dst con);
7808 ins_cost(125);
7809 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7810 ins_encode %{
7811 __ movflt($dst$$XMMRegister, $constantaddress($con));
7812 %}
7813 ins_pipe(pipe_slow);
7814 %}
7815
7816 instruct loadConH(regF dst, immH con) %{
7817 match(Set dst con);
7818 ins_cost(125);
7819 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7820 ins_encode %{
7821 __ movflt($dst$$XMMRegister, $constantaddress($con));
7822 %}
7823 ins_pipe(pipe_slow);
7824 %}
7825
7826 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7827 match(Set dst src);
7828 effect(KILL cr);
7829 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7830 ins_encode %{
7831 __ xorq($dst$$Register, $dst$$Register);
7832 %}
7833 ins_pipe(ialu_reg);
7834 %}
7835
7836 instruct loadConN(rRegN dst, immN src) %{
7837 match(Set dst src);
7838
7839 ins_cost(125);
7840 format %{ "movl $dst, $src\t# compressed ptr" %}
7841 ins_encode %{
7842 address con = (address)$src$$constant;
7843 if (con == nullptr) {
7844 ShouldNotReachHere();
7845 } else {
7846 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7847 }
7848 %}
7849 ins_pipe(ialu_reg_fat); // XXX
7850 %}
7851
7852 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7853 match(Set dst src);
7854
7855 ins_cost(125);
7856 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7857 ins_encode %{
7858 address con = (address)$src$$constant;
7859 if (con == nullptr) {
7860 ShouldNotReachHere();
7861 } else {
7862 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7863 }
7864 %}
7865 ins_pipe(ialu_reg_fat); // XXX
7866 %}
7867
7868 instruct loadConF0(regF dst, immF0 src)
7869 %{
7870 match(Set dst src);
7871 ins_cost(100);
7872
7873 format %{ "xorps $dst, $dst\t# float 0.0" %}
7874 ins_encode %{
7875 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7876 %}
7877 ins_pipe(pipe_slow);
7878 %}
7879
7880 // Use the same format since predicate() can not be used here.
7881 instruct loadConD(regD dst, immD con) %{
7882 match(Set dst con);
7883 ins_cost(125);
7884 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7885 ins_encode %{
7886 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7887 %}
7888 ins_pipe(pipe_slow);
7889 %}
7890
7891 instruct loadConD0(regD dst, immD0 src)
7892 %{
7893 match(Set dst src);
7894 ins_cost(100);
7895
7896 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7897 ins_encode %{
7898 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7899 %}
7900 ins_pipe(pipe_slow);
7901 %}
7902
7903 instruct loadSSI(rRegI dst, stackSlotI src)
7904 %{
7905 match(Set dst src);
7906
7907 ins_cost(125);
7908 format %{ "movl $dst, $src\t# int stk" %}
7909 ins_encode %{
7910 __ movl($dst$$Register, $src$$Address);
7911 %}
7912 ins_pipe(ialu_reg_mem);
7913 %}
7914
7915 instruct loadSSL(rRegL dst, stackSlotL src)
7916 %{
7917 match(Set dst src);
7918
7919 ins_cost(125);
7920 format %{ "movq $dst, $src\t# long stk" %}
7921 ins_encode %{
7922 __ movq($dst$$Register, $src$$Address);
7923 %}
7924 ins_pipe(ialu_reg_mem);
7925 %}
7926
7927 instruct loadSSP(rRegP dst, stackSlotP src)
7928 %{
7929 match(Set dst src);
7930
7931 ins_cost(125);
7932 format %{ "movq $dst, $src\t# ptr stk" %}
7933 ins_encode %{
7934 __ movq($dst$$Register, $src$$Address);
7935 %}
7936 ins_pipe(ialu_reg_mem);
7937 %}
7938
7939 instruct loadSSF(regF dst, stackSlotF src)
7940 %{
7941 match(Set dst src);
7942
7943 ins_cost(125);
7944 format %{ "movss $dst, $src\t# float stk" %}
7945 ins_encode %{
7946 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7947 %}
7948 ins_pipe(pipe_slow); // XXX
7949 %}
7950
7951 // Use the same format since predicate() can not be used here.
7952 instruct loadSSD(regD dst, stackSlotD src)
7953 %{
7954 match(Set dst src);
7955
7956 ins_cost(125);
7957 format %{ "movsd $dst, $src\t# double stk" %}
7958 ins_encode %{
7959 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7960 %}
7961 ins_pipe(pipe_slow); // XXX
7962 %}
7963
7964 // Prefetch instructions for allocation.
7965 // Must be safe to execute with invalid address (cannot fault).
7966
7967 instruct prefetchAlloc( memory mem ) %{
7968 predicate(AllocatePrefetchInstr==3);
7969 match(PrefetchAllocation mem);
7970 ins_cost(125);
7971
7972 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7973 ins_encode %{
7974 __ prefetchw($mem$$Address);
7975 %}
7976 ins_pipe(ialu_mem);
7977 %}
7978
7979 instruct prefetchAllocNTA( memory mem ) %{
7980 predicate(AllocatePrefetchInstr==0);
7981 match(PrefetchAllocation mem);
7982 ins_cost(125);
7983
7984 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7985 ins_encode %{
7986 __ prefetchnta($mem$$Address);
7987 %}
7988 ins_pipe(ialu_mem);
7989 %}
7990
7991 instruct prefetchAllocT0( memory mem ) %{
7992 predicate(AllocatePrefetchInstr==1);
7993 match(PrefetchAllocation mem);
7994 ins_cost(125);
7995
7996 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7997 ins_encode %{
7998 __ prefetcht0($mem$$Address);
7999 %}
8000 ins_pipe(ialu_mem);
8001 %}
8002
8003 instruct prefetchAllocT2( memory mem ) %{
8004 predicate(AllocatePrefetchInstr==2);
8005 match(PrefetchAllocation mem);
8006 ins_cost(125);
8007
8008 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8009 ins_encode %{
8010 __ prefetcht2($mem$$Address);
8011 %}
8012 ins_pipe(ialu_mem);
8013 %}
8014
8015 //----------Store Instructions-------------------------------------------------
8016
8017 // Store Byte
8018 instruct storeB(memory mem, rRegI src)
8019 %{
8020 match(Set mem (StoreB mem src));
8021
8022 ins_cost(125); // XXX
8023 format %{ "movb $mem, $src\t# byte" %}
8024 ins_encode %{
8025 __ movb($mem$$Address, $src$$Register);
8026 %}
8027 ins_pipe(ialu_mem_reg);
8028 %}
8029
8030 // Store Char/Short
8031 instruct storeC(memory mem, rRegI src)
8032 %{
8033 match(Set mem (StoreC mem src));
8034
8035 ins_cost(125); // XXX
8036 format %{ "movw $mem, $src\t# char/short" %}
8037 ins_encode %{
8038 __ movw($mem$$Address, $src$$Register);
8039 %}
8040 ins_pipe(ialu_mem_reg);
8041 %}
8042
8043 // Store Integer
8044 instruct storeI(memory mem, rRegI src)
8045 %{
8046 match(Set mem (StoreI mem src));
8047
8048 ins_cost(125); // XXX
8049 format %{ "movl $mem, $src\t# int" %}
8050 ins_encode %{
8051 __ movl($mem$$Address, $src$$Register);
8052 %}
8053 ins_pipe(ialu_mem_reg);
8054 %}
8055
8056 // Store Long
8057 instruct storeL(memory mem, rRegL src)
8058 %{
8059 match(Set mem (StoreL mem src));
8060
8061 ins_cost(125); // XXX
8062 format %{ "movq $mem, $src\t# long" %}
8063 ins_encode %{
8064 __ movq($mem$$Address, $src$$Register);
8065 %}
8066 ins_pipe(ialu_mem_reg); // XXX
8067 %}
8068
8069 // Store Pointer
8070 instruct storeP(memory mem, any_RegP src)
8071 %{
8072 predicate(n->as_Store()->barrier_data() == 0);
8073 match(Set mem (StoreP mem src));
8074
8075 ins_cost(125); // XXX
8076 format %{ "movq $mem, $src\t# ptr" %}
8077 ins_encode %{
8078 __ movq($mem$$Address, $src$$Register);
8079 %}
8080 ins_pipe(ialu_mem_reg);
8081 %}
8082
8083 instruct storeImmP0(memory mem, immP0 zero)
8084 %{
8085 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8086 match(Set mem (StoreP mem zero));
8087
8088 ins_cost(125); // XXX
8089 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8090 ins_encode %{
8091 __ movq($mem$$Address, r12);
8092 %}
8093 ins_pipe(ialu_mem_reg);
8094 %}
8095
8096 // Store Null Pointer, mark word, or other simple pointer constant.
8097 instruct storeImmP(memory mem, immP31 src)
8098 %{
8099 predicate(n->as_Store()->barrier_data() == 0);
8100 match(Set mem (StoreP mem src));
8101
8102 ins_cost(150); // XXX
8103 format %{ "movq $mem, $src\t# ptr" %}
8104 ins_encode %{
8105 __ movq($mem$$Address, $src$$constant);
8106 %}
8107 ins_pipe(ialu_mem_imm);
8108 %}
8109
8110 // Store Compressed Pointer
8111 instruct storeN(memory mem, rRegN src)
8112 %{
8113 predicate(n->as_Store()->barrier_data() == 0);
8114 match(Set mem (StoreN mem src));
8115
8116 ins_cost(125); // XXX
8117 format %{ "movl $mem, $src\t# compressed ptr" %}
8118 ins_encode %{
8119 __ movl($mem$$Address, $src$$Register);
8120 %}
8121 ins_pipe(ialu_mem_reg);
8122 %}
8123
8124 instruct storeNKlass(memory mem, rRegN src)
8125 %{
8126 match(Set mem (StoreNKlass mem src));
8127
8128 ins_cost(125); // XXX
8129 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8130 ins_encode %{
8131 __ movl($mem$$Address, $src$$Register);
8132 %}
8133 ins_pipe(ialu_mem_reg);
8134 %}
8135
8136 instruct storeImmN0(memory mem, immN0 zero)
8137 %{
8138 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8139 match(Set mem (StoreN mem zero));
8140
8141 ins_cost(125); // XXX
8142 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8143 ins_encode %{
8144 __ movl($mem$$Address, r12);
8145 %}
8146 ins_pipe(ialu_mem_reg);
8147 %}
8148
8149 instruct storeImmN(memory mem, immN src)
8150 %{
8151 predicate(n->as_Store()->barrier_data() == 0);
8152 match(Set mem (StoreN mem src));
8153
8154 ins_cost(150); // XXX
8155 format %{ "movl $mem, $src\t# compressed ptr" %}
8156 ins_encode %{
8157 address con = (address)$src$$constant;
8158 if (con == nullptr) {
8159 __ movl($mem$$Address, 0);
8160 } else {
8161 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8162 }
8163 %}
8164 ins_pipe(ialu_mem_imm);
8165 %}
8166
8167 instruct storeImmNKlass(memory mem, immNKlass src)
8168 %{
8169 match(Set mem (StoreNKlass mem src));
8170
8171 ins_cost(150); // XXX
8172 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8173 ins_encode %{
8174 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8175 %}
8176 ins_pipe(ialu_mem_imm);
8177 %}
8178
8179 // Store Integer Immediate
8180 instruct storeImmI0(memory mem, immI_0 zero)
8181 %{
8182 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8183 match(Set mem (StoreI mem zero));
8184
8185 ins_cost(125); // XXX
8186 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8187 ins_encode %{
8188 __ movl($mem$$Address, r12);
8189 %}
8190 ins_pipe(ialu_mem_reg);
8191 %}
8192
8193 instruct storeImmI(memory mem, immI src)
8194 %{
8195 match(Set mem (StoreI mem src));
8196
8197 ins_cost(150);
8198 format %{ "movl $mem, $src\t# int" %}
8199 ins_encode %{
8200 __ movl($mem$$Address, $src$$constant);
8201 %}
8202 ins_pipe(ialu_mem_imm);
8203 %}
8204
8205 // Store Long Immediate
8206 instruct storeImmL0(memory mem, immL0 zero)
8207 %{
8208 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8209 match(Set mem (StoreL mem zero));
8210
8211 ins_cost(125); // XXX
8212 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8213 ins_encode %{
8214 __ movq($mem$$Address, r12);
8215 %}
8216 ins_pipe(ialu_mem_reg);
8217 %}
8218
8219 instruct storeImmL(memory mem, immL32 src)
8220 %{
8221 match(Set mem (StoreL mem src));
8222
8223 ins_cost(150);
8224 format %{ "movq $mem, $src\t# long" %}
8225 ins_encode %{
8226 __ movq($mem$$Address, $src$$constant);
8227 %}
8228 ins_pipe(ialu_mem_imm);
8229 %}
8230
8231 // Store Short/Char Immediate
8232 instruct storeImmC0(memory mem, immI_0 zero)
8233 %{
8234 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8235 match(Set mem (StoreC mem zero));
8236
8237 ins_cost(125); // XXX
8238 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8239 ins_encode %{
8240 __ movw($mem$$Address, r12);
8241 %}
8242 ins_pipe(ialu_mem_reg);
8243 %}
8244
8245 instruct storeImmI16(memory mem, immI16 src)
8246 %{
8247 predicate(UseStoreImmI16);
8248 match(Set mem (StoreC mem src));
8249
8250 ins_cost(150);
8251 format %{ "movw $mem, $src\t# short/char" %}
8252 ins_encode %{
8253 __ movw($mem$$Address, $src$$constant);
8254 %}
8255 ins_pipe(ialu_mem_imm);
8256 %}
8257
8258 // Store Byte Immediate
8259 instruct storeImmB0(memory mem, immI_0 zero)
8260 %{
8261 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8262 match(Set mem (StoreB mem zero));
8263
8264 ins_cost(125); // XXX
8265 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8266 ins_encode %{
8267 __ movb($mem$$Address, r12);
8268 %}
8269 ins_pipe(ialu_mem_reg);
8270 %}
8271
8272 instruct storeImmB(memory mem, immI8 src)
8273 %{
8274 match(Set mem (StoreB mem src));
8275
8276 ins_cost(150); // XXX
8277 format %{ "movb $mem, $src\t# byte" %}
8278 ins_encode %{
8279 __ movb($mem$$Address, $src$$constant);
8280 %}
8281 ins_pipe(ialu_mem_imm);
8282 %}
8283
8284 // Store Float
8285 instruct storeF(memory mem, regF src)
8286 %{
8287 match(Set mem (StoreF mem src));
8288
8289 ins_cost(95); // XXX
8290 format %{ "movss $mem, $src\t# float" %}
8291 ins_encode %{
8292 __ movflt($mem$$Address, $src$$XMMRegister);
8293 %}
8294 ins_pipe(pipe_slow); // XXX
8295 %}
8296
8297 // Store immediate Float value (it is faster than store from XMM register)
8298 instruct storeF0(memory mem, immF0 zero)
8299 %{
8300 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8301 match(Set mem (StoreF mem zero));
8302
8303 ins_cost(25); // XXX
8304 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8305 ins_encode %{
8306 __ movl($mem$$Address, r12);
8307 %}
8308 ins_pipe(ialu_mem_reg);
8309 %}
8310
8311 instruct storeF_imm(memory mem, immF src)
8312 %{
8313 match(Set mem (StoreF mem src));
8314
8315 ins_cost(50);
8316 format %{ "movl $mem, $src\t# float" %}
8317 ins_encode %{
8318 __ movl($mem$$Address, jint_cast($src$$constant));
8319 %}
8320 ins_pipe(ialu_mem_imm);
8321 %}
8322
8323 // Store Double
8324 instruct storeD(memory mem, regD src)
8325 %{
8326 match(Set mem (StoreD mem src));
8327
8328 ins_cost(95); // XXX
8329 format %{ "movsd $mem, $src\t# double" %}
8330 ins_encode %{
8331 __ movdbl($mem$$Address, $src$$XMMRegister);
8332 %}
8333 ins_pipe(pipe_slow); // XXX
8334 %}
8335
8336 // Store immediate double 0.0 (it is faster than store from XMM register)
8337 instruct storeD0_imm(memory mem, immD0 src)
8338 %{
8339 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8340 match(Set mem (StoreD mem src));
8341
8342 ins_cost(50);
8343 format %{ "movq $mem, $src\t# double 0." %}
8344 ins_encode %{
8345 __ movq($mem$$Address, $src$$constant);
8346 %}
8347 ins_pipe(ialu_mem_imm);
8348 %}
8349
8350 instruct storeD0(memory mem, immD0 zero)
8351 %{
8352 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8353 match(Set mem (StoreD mem zero));
8354
8355 ins_cost(25); // XXX
8356 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8357 ins_encode %{
8358 __ movq($mem$$Address, r12);
8359 %}
8360 ins_pipe(ialu_mem_reg);
8361 %}
8362
8363 instruct storeSSI(stackSlotI dst, rRegI src)
8364 %{
8365 match(Set dst src);
8366
8367 ins_cost(100);
8368 format %{ "movl $dst, $src\t# int stk" %}
8369 ins_encode %{
8370 __ movl($dst$$Address, $src$$Register);
8371 %}
8372 ins_pipe( ialu_mem_reg );
8373 %}
8374
8375 instruct storeSSL(stackSlotL dst, rRegL src)
8376 %{
8377 match(Set dst src);
8378
8379 ins_cost(100);
8380 format %{ "movq $dst, $src\t# long stk" %}
8381 ins_encode %{
8382 __ movq($dst$$Address, $src$$Register);
8383 %}
8384 ins_pipe(ialu_mem_reg);
8385 %}
8386
8387 instruct storeSSP(stackSlotP dst, rRegP src)
8388 %{
8389 match(Set dst src);
8390
8391 ins_cost(100);
8392 format %{ "movq $dst, $src\t# ptr stk" %}
8393 ins_encode %{
8394 __ movq($dst$$Address, $src$$Register);
8395 %}
8396 ins_pipe(ialu_mem_reg);
8397 %}
8398
8399 instruct storeSSF(stackSlotF dst, regF src)
8400 %{
8401 match(Set dst src);
8402
8403 ins_cost(95); // XXX
8404 format %{ "movss $dst, $src\t# float stk" %}
8405 ins_encode %{
8406 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8407 %}
8408 ins_pipe(pipe_slow); // XXX
8409 %}
8410
8411 instruct storeSSD(stackSlotD dst, regD src)
8412 %{
8413 match(Set dst src);
8414
8415 ins_cost(95); // XXX
8416 format %{ "movsd $dst, $src\t# double stk" %}
8417 ins_encode %{
8418 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8419 %}
8420 ins_pipe(pipe_slow); // XXX
8421 %}
8422
8423 instruct cacheWB(indirect addr)
8424 %{
8425 predicate(VM_Version::supports_data_cache_line_flush());
8426 match(CacheWB addr);
8427
8428 ins_cost(100);
8429 format %{"cache wb $addr" %}
8430 ins_encode %{
8431 assert($addr->index_position() < 0, "should be");
8432 assert($addr$$disp == 0, "should be");
8433 __ cache_wb(Address($addr$$base$$Register, 0));
8434 %}
8435 ins_pipe(pipe_slow); // XXX
8436 %}
8437
8438 instruct cacheWBPreSync()
8439 %{
8440 predicate(VM_Version::supports_data_cache_line_flush());
8441 match(CacheWBPreSync);
8442
8443 ins_cost(100);
8444 format %{"cache wb presync" %}
8445 ins_encode %{
8446 __ cache_wbsync(true);
8447 %}
8448 ins_pipe(pipe_slow); // XXX
8449 %}
8450
8451 instruct cacheWBPostSync()
8452 %{
8453 predicate(VM_Version::supports_data_cache_line_flush());
8454 match(CacheWBPostSync);
8455
8456 ins_cost(100);
8457 format %{"cache wb postsync" %}
8458 ins_encode %{
8459 __ cache_wbsync(false);
8460 %}
8461 ins_pipe(pipe_slow); // XXX
8462 %}
8463
8464 //----------BSWAP Instructions-------------------------------------------------
8465 instruct bytes_reverse_int(rRegI dst) %{
8466 match(Set dst (ReverseBytesI dst));
8467
8468 format %{ "bswapl $dst" %}
8469 ins_encode %{
8470 __ bswapl($dst$$Register);
8471 %}
8472 ins_pipe( ialu_reg );
8473 %}
8474
8475 instruct bytes_reverse_long(rRegL dst) %{
8476 match(Set dst (ReverseBytesL dst));
8477
8478 format %{ "bswapq $dst" %}
8479 ins_encode %{
8480 __ bswapq($dst$$Register);
8481 %}
8482 ins_pipe( ialu_reg);
8483 %}
8484
8485 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8486 match(Set dst (ReverseBytesUS dst));
8487 effect(KILL cr);
8488
8489 format %{ "bswapl $dst\n\t"
8490 "shrl $dst,16\n\t" %}
8491 ins_encode %{
8492 __ bswapl($dst$$Register);
8493 __ shrl($dst$$Register, 16);
8494 %}
8495 ins_pipe( ialu_reg );
8496 %}
8497
8498 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8499 match(Set dst (ReverseBytesS dst));
8500 effect(KILL cr);
8501
8502 format %{ "bswapl $dst\n\t"
8503 "sar $dst,16\n\t" %}
8504 ins_encode %{
8505 __ bswapl($dst$$Register);
8506 __ sarl($dst$$Register, 16);
8507 %}
8508 ins_pipe( ialu_reg );
8509 %}
8510
8511 //---------- Zeros Count Instructions ------------------------------------------
8512
8513 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8514 predicate(UseCountLeadingZerosInstruction);
8515 match(Set dst (CountLeadingZerosI src));
8516 effect(KILL cr);
8517
8518 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8519 ins_encode %{
8520 __ lzcntl($dst$$Register, $src$$Register);
8521 %}
8522 ins_pipe(ialu_reg);
8523 %}
8524
8525 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8526 predicate(UseCountLeadingZerosInstruction);
8527 match(Set dst (CountLeadingZerosI (LoadI src)));
8528 effect(KILL cr);
8529 ins_cost(175);
8530 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8531 ins_encode %{
8532 __ lzcntl($dst$$Register, $src$$Address);
8533 %}
8534 ins_pipe(ialu_reg_mem);
8535 %}
8536
8537 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8538 predicate(!UseCountLeadingZerosInstruction);
8539 match(Set dst (CountLeadingZerosI src));
8540 effect(KILL cr);
8541
8542 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8543 "jnz skip\n\t"
8544 "movl $dst, -1\n"
8545 "skip:\n\t"
8546 "negl $dst\n\t"
8547 "addl $dst, 31" %}
8548 ins_encode %{
8549 Register Rdst = $dst$$Register;
8550 Register Rsrc = $src$$Register;
8551 Label skip;
8552 __ bsrl(Rdst, Rsrc);
8553 __ jccb(Assembler::notZero, skip);
8554 __ movl(Rdst, -1);
8555 __ bind(skip);
8556 __ negl(Rdst);
8557 __ addl(Rdst, BitsPerInt - 1);
8558 %}
8559 ins_pipe(ialu_reg);
8560 %}
8561
8562 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8563 predicate(UseCountLeadingZerosInstruction);
8564 match(Set dst (CountLeadingZerosL src));
8565 effect(KILL cr);
8566
8567 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8568 ins_encode %{
8569 __ lzcntq($dst$$Register, $src$$Register);
8570 %}
8571 ins_pipe(ialu_reg);
8572 %}
8573
8574 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8575 predicate(UseCountLeadingZerosInstruction);
8576 match(Set dst (CountLeadingZerosL (LoadL src)));
8577 effect(KILL cr);
8578 ins_cost(175);
8579 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8580 ins_encode %{
8581 __ lzcntq($dst$$Register, $src$$Address);
8582 %}
8583 ins_pipe(ialu_reg_mem);
8584 %}
8585
8586 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8587 predicate(!UseCountLeadingZerosInstruction);
8588 match(Set dst (CountLeadingZerosL src));
8589 effect(KILL cr);
8590
8591 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8592 "jnz skip\n\t"
8593 "movl $dst, -1\n"
8594 "skip:\n\t"
8595 "negl $dst\n\t"
8596 "addl $dst, 63" %}
8597 ins_encode %{
8598 Register Rdst = $dst$$Register;
8599 Register Rsrc = $src$$Register;
8600 Label skip;
8601 __ bsrq(Rdst, Rsrc);
8602 __ jccb(Assembler::notZero, skip);
8603 __ movl(Rdst, -1);
8604 __ bind(skip);
8605 __ negl(Rdst);
8606 __ addl(Rdst, BitsPerLong - 1);
8607 %}
8608 ins_pipe(ialu_reg);
8609 %}
8610
8611 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8612 predicate(UseCountTrailingZerosInstruction);
8613 match(Set dst (CountTrailingZerosI src));
8614 effect(KILL cr);
8615
8616 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8617 ins_encode %{
8618 __ tzcntl($dst$$Register, $src$$Register);
8619 %}
8620 ins_pipe(ialu_reg);
8621 %}
8622
8623 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8624 predicate(UseCountTrailingZerosInstruction);
8625 match(Set dst (CountTrailingZerosI (LoadI src)));
8626 effect(KILL cr);
8627 ins_cost(175);
8628 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8629 ins_encode %{
8630 __ tzcntl($dst$$Register, $src$$Address);
8631 %}
8632 ins_pipe(ialu_reg_mem);
8633 %}
8634
8635 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8636 predicate(!UseCountTrailingZerosInstruction);
8637 match(Set dst (CountTrailingZerosI src));
8638 effect(KILL cr);
8639
8640 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8641 "jnz done\n\t"
8642 "movl $dst, 32\n"
8643 "done:" %}
8644 ins_encode %{
8645 Register Rdst = $dst$$Register;
8646 Label done;
8647 __ bsfl(Rdst, $src$$Register);
8648 __ jccb(Assembler::notZero, done);
8649 __ movl(Rdst, BitsPerInt);
8650 __ bind(done);
8651 %}
8652 ins_pipe(ialu_reg);
8653 %}
8654
8655 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8656 predicate(UseCountTrailingZerosInstruction);
8657 match(Set dst (CountTrailingZerosL src));
8658 effect(KILL cr);
8659
8660 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8661 ins_encode %{
8662 __ tzcntq($dst$$Register, $src$$Register);
8663 %}
8664 ins_pipe(ialu_reg);
8665 %}
8666
8667 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8668 predicate(UseCountTrailingZerosInstruction);
8669 match(Set dst (CountTrailingZerosL (LoadL src)));
8670 effect(KILL cr);
8671 ins_cost(175);
8672 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8673 ins_encode %{
8674 __ tzcntq($dst$$Register, $src$$Address);
8675 %}
8676 ins_pipe(ialu_reg_mem);
8677 %}
8678
8679 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8680 predicate(!UseCountTrailingZerosInstruction);
8681 match(Set dst (CountTrailingZerosL src));
8682 effect(KILL cr);
8683
8684 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8685 "jnz done\n\t"
8686 "movl $dst, 64\n"
8687 "done:" %}
8688 ins_encode %{
8689 Register Rdst = $dst$$Register;
8690 Label done;
8691 __ bsfq(Rdst, $src$$Register);
8692 __ jccb(Assembler::notZero, done);
8693 __ movl(Rdst, BitsPerLong);
8694 __ bind(done);
8695 %}
8696 ins_pipe(ialu_reg);
8697 %}
8698
8699 //--------------- Reverse Operation Instructions ----------------
8700 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8701 predicate(!VM_Version::supports_gfni());
8702 match(Set dst (ReverseI src));
8703 effect(TEMP dst, TEMP rtmp, KILL cr);
8704 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8705 ins_encode %{
8706 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8707 %}
8708 ins_pipe( ialu_reg );
8709 %}
8710
8711 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8712 predicate(VM_Version::supports_gfni());
8713 match(Set dst (ReverseI src));
8714 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8715 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8716 ins_encode %{
8717 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8718 %}
8719 ins_pipe( ialu_reg );
8720 %}
8721
8722 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8723 predicate(!VM_Version::supports_gfni());
8724 match(Set dst (ReverseL src));
8725 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8726 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8727 ins_encode %{
8728 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8729 %}
8730 ins_pipe( ialu_reg );
8731 %}
8732
8733 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8734 predicate(VM_Version::supports_gfni());
8735 match(Set dst (ReverseL src));
8736 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8737 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8738 ins_encode %{
8739 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8740 %}
8741 ins_pipe( ialu_reg );
8742 %}
8743
8744 //---------- Population Count Instructions -------------------------------------
8745
8746 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8747 predicate(UsePopCountInstruction);
8748 match(Set dst (PopCountI src));
8749 effect(KILL cr);
8750
8751 format %{ "popcnt $dst, $src" %}
8752 ins_encode %{
8753 __ popcntl($dst$$Register, $src$$Register);
8754 %}
8755 ins_pipe(ialu_reg);
8756 %}
8757
8758 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8759 predicate(UsePopCountInstruction);
8760 match(Set dst (PopCountI (LoadI mem)));
8761 effect(KILL cr);
8762
8763 format %{ "popcnt $dst, $mem" %}
8764 ins_encode %{
8765 __ popcntl($dst$$Register, $mem$$Address);
8766 %}
8767 ins_pipe(ialu_reg);
8768 %}
8769
8770 // Note: Long.bitCount(long) returns an int.
8771 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8772 predicate(UsePopCountInstruction);
8773 match(Set dst (PopCountL src));
8774 effect(KILL cr);
8775
8776 format %{ "popcnt $dst, $src" %}
8777 ins_encode %{
8778 __ popcntq($dst$$Register, $src$$Register);
8779 %}
8780 ins_pipe(ialu_reg);
8781 %}
8782
8783 // Note: Long.bitCount(long) returns an int.
8784 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8785 predicate(UsePopCountInstruction);
8786 match(Set dst (PopCountL (LoadL mem)));
8787 effect(KILL cr);
8788
8789 format %{ "popcnt $dst, $mem" %}
8790 ins_encode %{
8791 __ popcntq($dst$$Register, $mem$$Address);
8792 %}
8793 ins_pipe(ialu_reg);
8794 %}
8795
8796
8797 //----------MemBar Instructions-----------------------------------------------
8798 // Memory barrier flavors
8799
8800 instruct membar_acquire()
8801 %{
8802 match(MemBarAcquire);
8803 match(LoadFence);
8804 ins_cost(0);
8805
8806 size(0);
8807 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8808 ins_encode();
8809 ins_pipe(empty);
8810 %}
8811
8812 instruct membar_acquire_lock()
8813 %{
8814 match(MemBarAcquireLock);
8815 ins_cost(0);
8816
8817 size(0);
8818 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8819 ins_encode();
8820 ins_pipe(empty);
8821 %}
8822
8823 instruct membar_release()
8824 %{
8825 match(MemBarRelease);
8826 match(StoreFence);
8827 ins_cost(0);
8828
8829 size(0);
8830 format %{ "MEMBAR-release ! (empty encoding)" %}
8831 ins_encode();
8832 ins_pipe(empty);
8833 %}
8834
8835 instruct membar_release_lock()
8836 %{
8837 match(MemBarReleaseLock);
8838 ins_cost(0);
8839
8840 size(0);
8841 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8842 ins_encode();
8843 ins_pipe(empty);
8844 %}
8845
8846 instruct membar_volatile(rFlagsReg cr) %{
8847 match(MemBarVolatile);
8848 effect(KILL cr);
8849 ins_cost(400);
8850
8851 format %{
8852 $$template
8853 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8854 %}
8855 ins_encode %{
8856 __ membar(Assembler::StoreLoad);
8857 %}
8858 ins_pipe(pipe_slow);
8859 %}
8860
8861 instruct unnecessary_membar_volatile()
8862 %{
8863 match(MemBarVolatile);
8864 predicate(Matcher::post_store_load_barrier(n));
8865 ins_cost(0);
8866
8867 size(0);
8868 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8869 ins_encode();
8870 ins_pipe(empty);
8871 %}
8872
8873 instruct membar_storestore() %{
8874 match(MemBarStoreStore);
8875 match(StoreStoreFence);
8876 ins_cost(0);
8877
8878 size(0);
8879 format %{ "MEMBAR-storestore (empty encoding)" %}
8880 ins_encode( );
8881 ins_pipe(empty);
8882 %}
8883
8884 //----------Move Instructions--------------------------------------------------
8885
8886 instruct castX2P(rRegP dst, rRegL src)
8887 %{
8888 match(Set dst (CastX2P src));
8889
8890 format %{ "movq $dst, $src\t# long->ptr" %}
8891 ins_encode %{
8892 if ($dst$$reg != $src$$reg) {
8893 __ movptr($dst$$Register, $src$$Register);
8894 }
8895 %}
8896 ins_pipe(ialu_reg_reg); // XXX
8897 %}
8898
8899 instruct castI2N(rRegN dst, rRegI src)
8900 %{
8901 match(Set dst (CastI2N src));
8902
8903 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8904 ins_encode %{
8905 if ($dst$$reg != $src$$reg) {
8906 __ movl($dst$$Register, $src$$Register);
8907 }
8908 %}
8909 ins_pipe(ialu_reg_reg); // XXX
8910 %}
8911
8912 instruct castN2X(rRegL dst, rRegN src)
8913 %{
8914 match(Set dst (CastP2X src));
8915
8916 format %{ "movq $dst, $src\t# ptr -> long" %}
8917 ins_encode %{
8918 if ($dst$$reg != $src$$reg) {
8919 __ movptr($dst$$Register, $src$$Register);
8920 }
8921 %}
8922 ins_pipe(ialu_reg_reg); // XXX
8923 %}
8924
8925 instruct castP2X(rRegL dst, rRegP src)
8926 %{
8927 match(Set dst (CastP2X src));
8928
8929 format %{ "movq $dst, $src\t# ptr -> long" %}
8930 ins_encode %{
8931 if ($dst$$reg != $src$$reg) {
8932 __ movptr($dst$$Register, $src$$Register);
8933 }
8934 %}
8935 ins_pipe(ialu_reg_reg); // XXX
8936 %}
8937
8938 // Convert oop into int for vectors alignment masking
8939 instruct convP2I(rRegI dst, rRegP src)
8940 %{
8941 match(Set dst (ConvL2I (CastP2X src)));
8942
8943 format %{ "movl $dst, $src\t# ptr -> int" %}
8944 ins_encode %{
8945 __ movl($dst$$Register, $src$$Register);
8946 %}
8947 ins_pipe(ialu_reg_reg); // XXX
8948 %}
8949
8950 // Convert compressed oop into int for vectors alignment masking
8951 // in case of 32bit oops (heap < 4Gb).
8952 instruct convN2I(rRegI dst, rRegN src)
8953 %{
8954 predicate(CompressedOops::shift() == 0);
8955 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8956
8957 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8958 ins_encode %{
8959 __ movl($dst$$Register, $src$$Register);
8960 %}
8961 ins_pipe(ialu_reg_reg); // XXX
8962 %}
8963
8964 // Convert oop pointer into compressed form
8965 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8966 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8967 match(Set dst (EncodeP src));
8968 effect(KILL cr);
8969 format %{ "encode_heap_oop $dst,$src" %}
8970 ins_encode %{
8971 Register s = $src$$Register;
8972 Register d = $dst$$Register;
8973 if (s != d) {
8974 __ movq(d, s);
8975 }
8976 __ encode_heap_oop(d);
8977 %}
8978 ins_pipe(ialu_reg_long);
8979 %}
8980
8981 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8982 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8983 match(Set dst (EncodeP src));
8984 effect(KILL cr);
8985 format %{ "encode_heap_oop_not_null $dst,$src" %}
8986 ins_encode %{
8987 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8988 %}
8989 ins_pipe(ialu_reg_long);
8990 %}
8991
8992 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8993 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8994 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8995 match(Set dst (DecodeN src));
8996 effect(KILL cr);
8997 format %{ "decode_heap_oop $dst,$src" %}
8998 ins_encode %{
8999 Register s = $src$$Register;
9000 Register d = $dst$$Register;
9001 if (s != d) {
9002 __ movq(d, s);
9003 }
9004 __ decode_heap_oop(d);
9005 %}
9006 ins_pipe(ialu_reg_long);
9007 %}
9008
9009 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9010 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9011 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9012 match(Set dst (DecodeN src));
9013 effect(KILL cr);
9014 format %{ "decode_heap_oop_not_null $dst,$src" %}
9015 ins_encode %{
9016 Register s = $src$$Register;
9017 Register d = $dst$$Register;
9018 if (s != d) {
9019 __ decode_heap_oop_not_null(d, s);
9020 } else {
9021 __ decode_heap_oop_not_null(d);
9022 }
9023 %}
9024 ins_pipe(ialu_reg_long);
9025 %}
9026
9027 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9028 match(Set dst (EncodePKlass src));
9029 effect(TEMP dst, KILL cr);
9030 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9031 ins_encode %{
9032 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9033 %}
9034 ins_pipe(ialu_reg_long);
9035 %}
9036
9037 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9038 match(Set dst (DecodeNKlass src));
9039 effect(TEMP dst, KILL cr);
9040 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9041 ins_encode %{
9042 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9043 %}
9044 ins_pipe(ialu_reg_long);
9045 %}
9046
9047 //----------Conditional Move---------------------------------------------------
9048 // Jump
9049 // dummy instruction for generating temp registers
9050 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9051 match(Jump (LShiftL switch_val shift));
9052 ins_cost(350);
9053 predicate(false);
9054 effect(TEMP dest);
9055
9056 format %{ "leaq $dest, [$constantaddress]\n\t"
9057 "jmp [$dest + $switch_val << $shift]\n\t" %}
9058 ins_encode %{
9059 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9060 // to do that and the compiler is using that register as one it can allocate.
9061 // So we build it all by hand.
9062 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9063 // ArrayAddress dispatch(table, index);
9064 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9065 __ lea($dest$$Register, $constantaddress);
9066 __ jmp(dispatch);
9067 %}
9068 ins_pipe(pipe_jmp);
9069 %}
9070
9071 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9072 match(Jump (AddL (LShiftL switch_val shift) offset));
9073 ins_cost(350);
9074 effect(TEMP dest);
9075
9076 format %{ "leaq $dest, [$constantaddress]\n\t"
9077 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9078 ins_encode %{
9079 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9080 // to do that and the compiler is using that register as one it can allocate.
9081 // So we build it all by hand.
9082 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9083 // ArrayAddress dispatch(table, index);
9084 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9085 __ lea($dest$$Register, $constantaddress);
9086 __ jmp(dispatch);
9087 %}
9088 ins_pipe(pipe_jmp);
9089 %}
9090
9091 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9092 match(Jump switch_val);
9093 ins_cost(350);
9094 effect(TEMP dest);
9095
9096 format %{ "leaq $dest, [$constantaddress]\n\t"
9097 "jmp [$dest + $switch_val]\n\t" %}
9098 ins_encode %{
9099 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9100 // to do that and the compiler is using that register as one it can allocate.
9101 // So we build it all by hand.
9102 // Address index(noreg, switch_reg, Address::times_1);
9103 // ArrayAddress dispatch(table, index);
9104 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9105 __ lea($dest$$Register, $constantaddress);
9106 __ jmp(dispatch);
9107 %}
9108 ins_pipe(pipe_jmp);
9109 %}
9110
9111 // Conditional move
9112 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9113 %{
9114 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9115 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9116
9117 ins_cost(100); // XXX
9118 format %{ "setbn$cop $dst\t# signed, int" %}
9119 ins_encode %{
9120 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9121 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9122 %}
9123 ins_pipe(ialu_reg);
9124 %}
9125
9126 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9127 %{
9128 predicate(!UseAPX);
9129 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9130
9131 ins_cost(200); // XXX
9132 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9133 ins_encode %{
9134 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9135 %}
9136 ins_pipe(pipe_cmov_reg);
9137 %}
9138
9139 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9140 %{
9141 predicate(UseAPX);
9142 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9143
9144 ins_cost(200);
9145 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9146 ins_encode %{
9147 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9148 %}
9149 ins_pipe(pipe_cmov_reg);
9150 %}
9151
9152 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9153 %{
9154 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9155 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9156
9157 ins_cost(100); // XXX
9158 format %{ "setbn$cop $dst\t# unsigned, int" %}
9159 ins_encode %{
9160 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9161 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9162 %}
9163 ins_pipe(ialu_reg);
9164 %}
9165
9166 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9167 predicate(!UseAPX);
9168 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9169
9170 ins_cost(200); // XXX
9171 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9172 ins_encode %{
9173 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9174 %}
9175 ins_pipe(pipe_cmov_reg);
9176 %}
9177
9178 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9179 predicate(UseAPX);
9180 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9181
9182 ins_cost(200);
9183 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9184 ins_encode %{
9185 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9186 %}
9187 ins_pipe(pipe_cmov_reg);
9188 %}
9189
9190 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9191 %{
9192 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9193 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9194
9195 ins_cost(100); // XXX
9196 format %{ "setbn$cop $dst\t# unsigned, int" %}
9197 ins_encode %{
9198 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9199 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9200 %}
9201 ins_pipe(ialu_reg);
9202 %}
9203
9204 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9205 predicate(!UseAPX);
9206 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9207 ins_cost(200);
9208 expand %{
9209 cmovI_regU(cop, cr, dst, src);
9210 %}
9211 %}
9212
9213 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9214 predicate(UseAPX);
9215 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9216 ins_cost(200);
9217 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9218 ins_encode %{
9219 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9220 %}
9221 ins_pipe(pipe_cmov_reg);
9222 %}
9223
9224 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9225 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9226 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9227
9228 ins_cost(200); // XXX
9229 format %{ "cmovpl $dst, $src\n\t"
9230 "cmovnel $dst, $src" %}
9231 ins_encode %{
9232 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9233 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9234 %}
9235 ins_pipe(pipe_cmov_reg);
9236 %}
9237
9238 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9239 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9240 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9241 effect(TEMP dst);
9242
9243 ins_cost(200);
9244 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9245 "cmovnel $dst, $src2" %}
9246 ins_encode %{
9247 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9248 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9249 %}
9250 ins_pipe(pipe_cmov_reg);
9251 %}
9252
9253 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9254 // inputs of the CMove
9255 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9256 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9257 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9258 effect(TEMP dst);
9259
9260 ins_cost(200); // XXX
9261 format %{ "cmovpl $dst, $src\n\t"
9262 "cmovnel $dst, $src" %}
9263 ins_encode %{
9264 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9265 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9266 %}
9267 ins_pipe(pipe_cmov_reg);
9268 %}
9269
9270 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9271 // and parity flag bit is set if any of the operand is a NaN.
9272 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9273 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9274 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9275 effect(TEMP dst);
9276
9277 ins_cost(200);
9278 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9279 "cmovnel $dst, $src2" %}
9280 ins_encode %{
9281 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9282 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9283 %}
9284 ins_pipe(pipe_cmov_reg);
9285 %}
9286
9287 // Conditional move
9288 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9289 predicate(!UseAPX);
9290 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9291
9292 ins_cost(250); // XXX
9293 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9294 ins_encode %{
9295 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9296 %}
9297 ins_pipe(pipe_cmov_mem);
9298 %}
9299
9300 // Conditional move
9301 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9302 %{
9303 predicate(UseAPX);
9304 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9305
9306 ins_cost(250);
9307 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9308 ins_encode %{
9309 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9310 %}
9311 ins_pipe(pipe_cmov_mem);
9312 %}
9313
9314 // Conditional move
9315 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9316 %{
9317 predicate(!UseAPX);
9318 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9319
9320 ins_cost(250); // XXX
9321 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9322 ins_encode %{
9323 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9324 %}
9325 ins_pipe(pipe_cmov_mem);
9326 %}
9327
9328 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9329 predicate(!UseAPX);
9330 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9331 ins_cost(250);
9332 expand %{
9333 cmovI_memU(cop, cr, dst, src);
9334 %}
9335 %}
9336
9337 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9338 %{
9339 predicate(UseAPX);
9340 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9341
9342 ins_cost(250);
9343 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9344 ins_encode %{
9345 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9346 %}
9347 ins_pipe(pipe_cmov_mem);
9348 %}
9349
9350 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9351 %{
9352 predicate(UseAPX);
9353 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9354 ins_cost(250);
9355 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9356 ins_encode %{
9357 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9358 %}
9359 ins_pipe(pipe_cmov_mem);
9360 %}
9361
9362 // Conditional move
9363 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9364 %{
9365 predicate(!UseAPX);
9366 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9367
9368 ins_cost(200); // XXX
9369 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9370 ins_encode %{
9371 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9372 %}
9373 ins_pipe(pipe_cmov_reg);
9374 %}
9375
9376 // Conditional move ndd
9377 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9378 %{
9379 predicate(UseAPX);
9380 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9381
9382 ins_cost(200);
9383 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9384 ins_encode %{
9385 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9386 %}
9387 ins_pipe(pipe_cmov_reg);
9388 %}
9389
9390 // Conditional move
9391 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9392 %{
9393 predicate(!UseAPX);
9394 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9395
9396 ins_cost(200); // XXX
9397 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9398 ins_encode %{
9399 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9400 %}
9401 ins_pipe(pipe_cmov_reg);
9402 %}
9403
9404 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9405 predicate(!UseAPX);
9406 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9407 ins_cost(200);
9408 expand %{
9409 cmovN_regU(cop, cr, dst, src);
9410 %}
9411 %}
9412
9413 // Conditional move ndd
9414 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9415 %{
9416 predicate(UseAPX);
9417 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9418
9419 ins_cost(200);
9420 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9421 ins_encode %{
9422 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9423 %}
9424 ins_pipe(pipe_cmov_reg);
9425 %}
9426
9427 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9428 predicate(UseAPX);
9429 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9430 ins_cost(200);
9431 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9432 ins_encode %{
9433 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9434 %}
9435 ins_pipe(pipe_cmov_reg);
9436 %}
9437
9438 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9439 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9440 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9441
9442 ins_cost(200); // XXX
9443 format %{ "cmovpl $dst, $src\n\t"
9444 "cmovnel $dst, $src" %}
9445 ins_encode %{
9446 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9447 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9448 %}
9449 ins_pipe(pipe_cmov_reg);
9450 %}
9451
9452 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9453 // inputs of the CMove
9454 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9455 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9456 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9457
9458 ins_cost(200); // XXX
9459 format %{ "cmovpl $dst, $src\n\t"
9460 "cmovnel $dst, $src" %}
9461 ins_encode %{
9462 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9463 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9464 %}
9465 ins_pipe(pipe_cmov_reg);
9466 %}
9467
9468 // Conditional move
9469 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9470 %{
9471 predicate(!UseAPX);
9472 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9473
9474 ins_cost(200); // XXX
9475 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9476 ins_encode %{
9477 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9478 %}
9479 ins_pipe(pipe_cmov_reg); // XXX
9480 %}
9481
9482 // Conditional move ndd
9483 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9484 %{
9485 predicate(UseAPX);
9486 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9487
9488 ins_cost(200);
9489 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9490 ins_encode %{
9491 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9492 %}
9493 ins_pipe(pipe_cmov_reg);
9494 %}
9495
9496 // Conditional move
9497 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9498 %{
9499 predicate(!UseAPX);
9500 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9501
9502 ins_cost(200); // XXX
9503 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9504 ins_encode %{
9505 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9506 %}
9507 ins_pipe(pipe_cmov_reg); // XXX
9508 %}
9509
9510 // Conditional move ndd
9511 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9512 %{
9513 predicate(UseAPX);
9514 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9515
9516 ins_cost(200);
9517 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9518 ins_encode %{
9519 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9520 %}
9521 ins_pipe(pipe_cmov_reg);
9522 %}
9523
9524 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9525 predicate(!UseAPX);
9526 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9527 ins_cost(200);
9528 expand %{
9529 cmovP_regU(cop, cr, dst, src);
9530 %}
9531 %}
9532
9533 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9534 predicate(UseAPX);
9535 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9536 ins_cost(200);
9537 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9538 ins_encode %{
9539 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9540 %}
9541 ins_pipe(pipe_cmov_reg);
9542 %}
9543
9544 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9545 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9546 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9547
9548 ins_cost(200); // XXX
9549 format %{ "cmovpq $dst, $src\n\t"
9550 "cmovneq $dst, $src" %}
9551 ins_encode %{
9552 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9553 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9554 %}
9555 ins_pipe(pipe_cmov_reg);
9556 %}
9557
9558 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9559 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9560 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9561 effect(TEMP dst);
9562
9563 ins_cost(200);
9564 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9565 "cmovneq $dst, $src2" %}
9566 ins_encode %{
9567 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9568 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9569 %}
9570 ins_pipe(pipe_cmov_reg);
9571 %}
9572
9573 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9574 // inputs of the CMove
9575 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9576 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9577 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9578
9579 ins_cost(200); // XXX
9580 format %{ "cmovpq $dst, $src\n\t"
9581 "cmovneq $dst, $src" %}
9582 ins_encode %{
9583 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9584 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9585 %}
9586 ins_pipe(pipe_cmov_reg);
9587 %}
9588
9589 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9590 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9591 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9592 effect(TEMP dst);
9593
9594 ins_cost(200);
9595 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9596 "cmovneq $dst, $src2" %}
9597 ins_encode %{
9598 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9599 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9600 %}
9601 ins_pipe(pipe_cmov_reg);
9602 %}
9603
9604 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9605 %{
9606 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9607 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9608
9609 ins_cost(100); // XXX
9610 format %{ "setbn$cop $dst\t# signed, long" %}
9611 ins_encode %{
9612 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9613 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9614 %}
9615 ins_pipe(ialu_reg);
9616 %}
9617
9618 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9619 %{
9620 predicate(!UseAPX);
9621 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9622
9623 ins_cost(200); // XXX
9624 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9625 ins_encode %{
9626 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9627 %}
9628 ins_pipe(pipe_cmov_reg); // XXX
9629 %}
9630
9631 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9632 %{
9633 predicate(UseAPX);
9634 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9635
9636 ins_cost(200);
9637 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9638 ins_encode %{
9639 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9640 %}
9641 ins_pipe(pipe_cmov_reg);
9642 %}
9643
9644 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9645 %{
9646 predicate(!UseAPX);
9647 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9648
9649 ins_cost(200); // XXX
9650 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9651 ins_encode %{
9652 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9653 %}
9654 ins_pipe(pipe_cmov_mem); // XXX
9655 %}
9656
9657 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9658 %{
9659 predicate(UseAPX);
9660 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9661
9662 ins_cost(200);
9663 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9664 ins_encode %{
9665 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9666 %}
9667 ins_pipe(pipe_cmov_mem);
9668 %}
9669
9670 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9671 %{
9672 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9673 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9674
9675 ins_cost(100); // XXX
9676 format %{ "setbn$cop $dst\t# unsigned, long" %}
9677 ins_encode %{
9678 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9679 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9680 %}
9681 ins_pipe(ialu_reg);
9682 %}
9683
9684 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9685 %{
9686 predicate(!UseAPX);
9687 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9688
9689 ins_cost(200); // XXX
9690 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9691 ins_encode %{
9692 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9693 %}
9694 ins_pipe(pipe_cmov_reg); // XXX
9695 %}
9696
9697 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9698 %{
9699 predicate(UseAPX);
9700 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9701
9702 ins_cost(200);
9703 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9704 ins_encode %{
9705 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9706 %}
9707 ins_pipe(pipe_cmov_reg);
9708 %}
9709
9710 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9711 %{
9712 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9713 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9714
9715 ins_cost(100); // XXX
9716 format %{ "setbn$cop $dst\t# unsigned, long" %}
9717 ins_encode %{
9718 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9719 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9720 %}
9721 ins_pipe(ialu_reg);
9722 %}
9723
9724 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9725 predicate(!UseAPX);
9726 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9727 ins_cost(200);
9728 expand %{
9729 cmovL_regU(cop, cr, dst, src);
9730 %}
9731 %}
9732
9733 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9734 %{
9735 predicate(UseAPX);
9736 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9737 ins_cost(200);
9738 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9739 ins_encode %{
9740 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9741 %}
9742 ins_pipe(pipe_cmov_reg);
9743 %}
9744
9745 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9746 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9747 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9748
9749 ins_cost(200); // XXX
9750 format %{ "cmovpq $dst, $src\n\t"
9751 "cmovneq $dst, $src" %}
9752 ins_encode %{
9753 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9754 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9755 %}
9756 ins_pipe(pipe_cmov_reg);
9757 %}
9758
9759 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9760 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9761 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9762 effect(TEMP dst);
9763
9764 ins_cost(200);
9765 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9766 "cmovneq $dst, $src2" %}
9767 ins_encode %{
9768 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9769 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9770 %}
9771 ins_pipe(pipe_cmov_reg);
9772 %}
9773
9774 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9775 // inputs of the CMove
9776 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9777 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9778 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9779
9780 ins_cost(200); // XXX
9781 format %{ "cmovpq $dst, $src\n\t"
9782 "cmovneq $dst, $src" %}
9783 ins_encode %{
9784 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9785 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9786 %}
9787 ins_pipe(pipe_cmov_reg);
9788 %}
9789
9790 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9791 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9792 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9793 effect(TEMP dst);
9794
9795 ins_cost(200);
9796 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9797 "cmovneq $dst, $src2" %}
9798 ins_encode %{
9799 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9800 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9801 %}
9802 ins_pipe(pipe_cmov_reg);
9803 %}
9804
9805 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9806 %{
9807 predicate(!UseAPX);
9808 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9809
9810 ins_cost(200); // XXX
9811 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9812 ins_encode %{
9813 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9814 %}
9815 ins_pipe(pipe_cmov_mem); // XXX
9816 %}
9817
9818 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9819 predicate(!UseAPX);
9820 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9821 ins_cost(200);
9822 expand %{
9823 cmovL_memU(cop, cr, dst, src);
9824 %}
9825 %}
9826
9827 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9828 %{
9829 predicate(UseAPX);
9830 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9831
9832 ins_cost(200);
9833 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9834 ins_encode %{
9835 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9836 %}
9837 ins_pipe(pipe_cmov_mem);
9838 %}
9839
9840 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9841 %{
9842 predicate(UseAPX);
9843 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9844 ins_cost(200);
9845 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9846 ins_encode %{
9847 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9848 %}
9849 ins_pipe(pipe_cmov_mem);
9850 %}
9851
9852 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9853 %{
9854 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9855
9856 ins_cost(200); // XXX
9857 format %{ "jn$cop skip\t# signed cmove float\n\t"
9858 "movss $dst, $src\n"
9859 "skip:" %}
9860 ins_encode %{
9861 Label Lskip;
9862 // Invert sense of branch from sense of CMOV
9863 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9864 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9865 __ bind(Lskip);
9866 %}
9867 ins_pipe(pipe_slow);
9868 %}
9869
9870 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9871 %{
9872 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9873
9874 ins_cost(200); // XXX
9875 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9876 "movss $dst, $src\n"
9877 "skip:" %}
9878 ins_encode %{
9879 Label Lskip;
9880 // Invert sense of branch from sense of CMOV
9881 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9882 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9883 __ bind(Lskip);
9884 %}
9885 ins_pipe(pipe_slow);
9886 %}
9887
9888 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9889 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9890 ins_cost(200);
9891 expand %{
9892 cmovF_regU(cop, cr, dst, src);
9893 %}
9894 %}
9895
9896 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9897 %{
9898 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9899
9900 ins_cost(200); // XXX
9901 format %{ "jn$cop skip\t# signed cmove double\n\t"
9902 "movsd $dst, $src\n"
9903 "skip:" %}
9904 ins_encode %{
9905 Label Lskip;
9906 // Invert sense of branch from sense of CMOV
9907 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9908 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9909 __ bind(Lskip);
9910 %}
9911 ins_pipe(pipe_slow);
9912 %}
9913
9914 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9915 %{
9916 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9917
9918 ins_cost(200); // XXX
9919 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9920 "movsd $dst, $src\n"
9921 "skip:" %}
9922 ins_encode %{
9923 Label Lskip;
9924 // Invert sense of branch from sense of CMOV
9925 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9926 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9927 __ bind(Lskip);
9928 %}
9929 ins_pipe(pipe_slow);
9930 %}
9931
9932 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9933 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9934 ins_cost(200);
9935 expand %{
9936 cmovD_regU(cop, cr, dst, src);
9937 %}
9938 %}
9939
9940 //----------Arithmetic Instructions--------------------------------------------
9941 //----------Addition Instructions----------------------------------------------
9942
9943 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9944 %{
9945 predicate(!UseAPX);
9946 match(Set dst (AddI dst src));
9947 effect(KILL cr);
9948 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9949 format %{ "addl $dst, $src\t# int" %}
9950 ins_encode %{
9951 __ addl($dst$$Register, $src$$Register);
9952 %}
9953 ins_pipe(ialu_reg_reg);
9954 %}
9955
9956 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9957 %{
9958 predicate(UseAPX);
9959 match(Set dst (AddI src1 src2));
9960 effect(KILL cr);
9961 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
9962
9963 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9964 ins_encode %{
9965 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9966 %}
9967 ins_pipe(ialu_reg_reg);
9968 %}
9969
9970 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9971 %{
9972 predicate(!UseAPX);
9973 match(Set dst (AddI dst src));
9974 effect(KILL cr);
9975 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9976
9977 format %{ "addl $dst, $src\t# int" %}
9978 ins_encode %{
9979 __ addl($dst$$Register, $src$$constant);
9980 %}
9981 ins_pipe( ialu_reg );
9982 %}
9983
9984 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9985 %{
9986 predicate(UseAPX);
9987 match(Set dst (AddI src1 src2));
9988 effect(KILL cr);
9989 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
9990
9991 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9992 ins_encode %{
9993 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9994 %}
9995 ins_pipe( ialu_reg );
9996 %}
9997
9998 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9999 %{
10000 predicate(UseAPX);
10001 match(Set dst (AddI (LoadI src1) src2));
10002 effect(KILL cr);
10003 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10004
10005 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10006 ins_encode %{
10007 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10008 %}
10009 ins_pipe( ialu_reg );
10010 %}
10011
10012 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10013 %{
10014 predicate(!UseAPX);
10015 match(Set dst (AddI dst (LoadI src)));
10016 effect(KILL cr);
10017 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10018
10019 ins_cost(150); // XXX
10020 format %{ "addl $dst, $src\t# int" %}
10021 ins_encode %{
10022 __ addl($dst$$Register, $src$$Address);
10023 %}
10024 ins_pipe(ialu_reg_mem);
10025 %}
10026
10027 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10028 %{
10029 predicate(UseAPX);
10030 match(Set dst (AddI src1 (LoadI src2)));
10031 effect(KILL cr);
10032 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10033
10034 ins_cost(150);
10035 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10036 ins_encode %{
10037 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10038 %}
10039 ins_pipe(ialu_reg_mem);
10040 %}
10041
10042 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10043 %{
10044 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10045 effect(KILL cr);
10046 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10047
10048 ins_cost(150); // XXX
10049 format %{ "addl $dst, $src\t# int" %}
10050 ins_encode %{
10051 __ addl($dst$$Address, $src$$Register);
10052 %}
10053 ins_pipe(ialu_mem_reg);
10054 %}
10055
10056 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10057 %{
10058 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10059 effect(KILL cr);
10060 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10061
10062
10063 ins_cost(125); // XXX
10064 format %{ "addl $dst, $src\t# int" %}
10065 ins_encode %{
10066 __ addl($dst$$Address, $src$$constant);
10067 %}
10068 ins_pipe(ialu_mem_imm);
10069 %}
10070
10071 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10072 %{
10073 predicate(!UseAPX && UseIncDec);
10074 match(Set dst (AddI dst src));
10075 effect(KILL cr);
10076
10077 format %{ "incl $dst\t# int" %}
10078 ins_encode %{
10079 __ incrementl($dst$$Register);
10080 %}
10081 ins_pipe(ialu_reg);
10082 %}
10083
10084 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10085 %{
10086 predicate(UseAPX && UseIncDec);
10087 match(Set dst (AddI src val));
10088 effect(KILL cr);
10089 flag(PD::Flag_ndd_demotable);
10090
10091 format %{ "eincl $dst, $src\t# int ndd" %}
10092 ins_encode %{
10093 __ eincl($dst$$Register, $src$$Register, false);
10094 %}
10095 ins_pipe(ialu_reg);
10096 %}
10097
10098 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10099 %{
10100 predicate(UseAPX && UseIncDec);
10101 match(Set dst (AddI (LoadI src) val));
10102 effect(KILL cr);
10103
10104 format %{ "eincl $dst, $src\t# int ndd" %}
10105 ins_encode %{
10106 __ eincl($dst$$Register, $src$$Address, false);
10107 %}
10108 ins_pipe(ialu_reg);
10109 %}
10110
10111 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10112 %{
10113 predicate(UseIncDec);
10114 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10115 effect(KILL cr);
10116
10117 ins_cost(125); // XXX
10118 format %{ "incl $dst\t# int" %}
10119 ins_encode %{
10120 __ incrementl($dst$$Address);
10121 %}
10122 ins_pipe(ialu_mem_imm);
10123 %}
10124
10125 // XXX why does that use AddI
10126 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10127 %{
10128 predicate(!UseAPX && UseIncDec);
10129 match(Set dst (AddI dst src));
10130 effect(KILL cr);
10131
10132 format %{ "decl $dst\t# int" %}
10133 ins_encode %{
10134 __ decrementl($dst$$Register);
10135 %}
10136 ins_pipe(ialu_reg);
10137 %}
10138
10139 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10140 %{
10141 predicate(UseAPX && UseIncDec);
10142 match(Set dst (AddI src val));
10143 effect(KILL cr);
10144 flag(PD::Flag_ndd_demotable);
10145
10146 format %{ "edecl $dst, $src\t# int ndd" %}
10147 ins_encode %{
10148 __ edecl($dst$$Register, $src$$Register, false);
10149 %}
10150 ins_pipe(ialu_reg);
10151 %}
10152
10153 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10154 %{
10155 predicate(UseAPX && UseIncDec);
10156 match(Set dst (AddI (LoadI src) val));
10157 effect(KILL cr);
10158
10159 format %{ "edecl $dst, $src\t# int ndd" %}
10160 ins_encode %{
10161 __ edecl($dst$$Register, $src$$Address, false);
10162 %}
10163 ins_pipe(ialu_reg);
10164 %}
10165
10166 // XXX why does that use AddI
10167 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10168 %{
10169 predicate(UseIncDec);
10170 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10171 effect(KILL cr);
10172
10173 ins_cost(125); // XXX
10174 format %{ "decl $dst\t# int" %}
10175 ins_encode %{
10176 __ decrementl($dst$$Address);
10177 %}
10178 ins_pipe(ialu_mem_imm);
10179 %}
10180
10181 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10182 %{
10183 predicate(VM_Version::supports_fast_2op_lea());
10184 match(Set dst (AddI (LShiftI index scale) disp));
10185
10186 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10187 ins_encode %{
10188 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10189 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10190 %}
10191 ins_pipe(ialu_reg_reg);
10192 %}
10193
10194 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10195 %{
10196 predicate(VM_Version::supports_fast_3op_lea());
10197 match(Set dst (AddI (AddI base index) disp));
10198
10199 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10200 ins_encode %{
10201 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10202 %}
10203 ins_pipe(ialu_reg_reg);
10204 %}
10205
10206 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10207 %{
10208 predicate(VM_Version::supports_fast_2op_lea());
10209 match(Set dst (AddI base (LShiftI index scale)));
10210
10211 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10212 ins_encode %{
10213 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10214 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10215 %}
10216 ins_pipe(ialu_reg_reg);
10217 %}
10218
10219 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10220 %{
10221 predicate(VM_Version::supports_fast_3op_lea());
10222 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10223
10224 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10225 ins_encode %{
10226 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10227 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10228 %}
10229 ins_pipe(ialu_reg_reg);
10230 %}
10231
10232 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10233 %{
10234 predicate(!UseAPX);
10235 match(Set dst (AddL dst src));
10236 effect(KILL cr);
10237 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10238
10239 format %{ "addq $dst, $src\t# long" %}
10240 ins_encode %{
10241 __ addq($dst$$Register, $src$$Register);
10242 %}
10243 ins_pipe(ialu_reg_reg);
10244 %}
10245
10246 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10247 %{
10248 predicate(UseAPX);
10249 match(Set dst (AddL src1 src2));
10250 effect(KILL cr);
10251 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10252
10253 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10254 ins_encode %{
10255 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10256 %}
10257 ins_pipe(ialu_reg_reg);
10258 %}
10259
10260 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10261 %{
10262 predicate(!UseAPX);
10263 match(Set dst (AddL dst src));
10264 effect(KILL cr);
10265 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10266
10267 format %{ "addq $dst, $src\t# long" %}
10268 ins_encode %{
10269 __ addq($dst$$Register, $src$$constant);
10270 %}
10271 ins_pipe( ialu_reg );
10272 %}
10273
10274 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10275 %{
10276 predicate(UseAPX);
10277 match(Set dst (AddL src1 src2));
10278 effect(KILL cr);
10279 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
10280
10281 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10282 ins_encode %{
10283 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10284 %}
10285 ins_pipe( ialu_reg );
10286 %}
10287
10288 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10289 %{
10290 predicate(UseAPX);
10291 match(Set dst (AddL (LoadL src1) src2));
10292 effect(KILL cr);
10293 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10294
10295 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10296 ins_encode %{
10297 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10298 %}
10299 ins_pipe( ialu_reg );
10300 %}
10301
10302 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10303 %{
10304 predicate(!UseAPX);
10305 match(Set dst (AddL dst (LoadL src)));
10306 effect(KILL cr);
10307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308
10309 ins_cost(150); // XXX
10310 format %{ "addq $dst, $src\t# long" %}
10311 ins_encode %{
10312 __ addq($dst$$Register, $src$$Address);
10313 %}
10314 ins_pipe(ialu_reg_mem);
10315 %}
10316
10317 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10318 %{
10319 predicate(UseAPX);
10320 match(Set dst (AddL src1 (LoadL src2)));
10321 effect(KILL cr);
10322 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10323
10324 ins_cost(150);
10325 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10326 ins_encode %{
10327 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10328 %}
10329 ins_pipe(ialu_reg_mem);
10330 %}
10331
10332 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10333 %{
10334 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10335 effect(KILL cr);
10336 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10337
10338 ins_cost(150); // XXX
10339 format %{ "addq $dst, $src\t# long" %}
10340 ins_encode %{
10341 __ addq($dst$$Address, $src$$Register);
10342 %}
10343 ins_pipe(ialu_mem_reg);
10344 %}
10345
10346 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10347 %{
10348 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10349 effect(KILL cr);
10350 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10351
10352 ins_cost(125); // XXX
10353 format %{ "addq $dst, $src\t# long" %}
10354 ins_encode %{
10355 __ addq($dst$$Address, $src$$constant);
10356 %}
10357 ins_pipe(ialu_mem_imm);
10358 %}
10359
10360 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10361 %{
10362 predicate(!UseAPX && UseIncDec);
10363 match(Set dst (AddL dst src));
10364 effect(KILL cr);
10365
10366 format %{ "incq $dst\t# long" %}
10367 ins_encode %{
10368 __ incrementq($dst$$Register);
10369 %}
10370 ins_pipe(ialu_reg);
10371 %}
10372
10373 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10374 %{
10375 predicate(UseAPX && UseIncDec);
10376 match(Set dst (AddL src val));
10377 effect(KILL cr);
10378 flag(PD::Flag_ndd_demotable);
10379
10380 format %{ "eincq $dst, $src\t# long ndd" %}
10381 ins_encode %{
10382 __ eincq($dst$$Register, $src$$Register, false);
10383 %}
10384 ins_pipe(ialu_reg);
10385 %}
10386
10387 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10388 %{
10389 predicate(UseAPX && UseIncDec);
10390 match(Set dst (AddL (LoadL src) val));
10391 effect(KILL cr);
10392
10393 format %{ "eincq $dst, $src\t# long ndd" %}
10394 ins_encode %{
10395 __ eincq($dst$$Register, $src$$Address, false);
10396 %}
10397 ins_pipe(ialu_reg);
10398 %}
10399
10400 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10401 %{
10402 predicate(UseIncDec);
10403 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10404 effect(KILL cr);
10405
10406 ins_cost(125); // XXX
10407 format %{ "incq $dst\t# long" %}
10408 ins_encode %{
10409 __ incrementq($dst$$Address);
10410 %}
10411 ins_pipe(ialu_mem_imm);
10412 %}
10413
10414 // XXX why does that use AddL
10415 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10416 %{
10417 predicate(!UseAPX && UseIncDec);
10418 match(Set dst (AddL dst src));
10419 effect(KILL cr);
10420
10421 format %{ "decq $dst\t# long" %}
10422 ins_encode %{
10423 __ decrementq($dst$$Register);
10424 %}
10425 ins_pipe(ialu_reg);
10426 %}
10427
10428 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10429 %{
10430 predicate(UseAPX && UseIncDec);
10431 match(Set dst (AddL src val));
10432 effect(KILL cr);
10433 flag(PD::Flag_ndd_demotable);
10434
10435 format %{ "edecq $dst, $src\t# long ndd" %}
10436 ins_encode %{
10437 __ edecq($dst$$Register, $src$$Register, false);
10438 %}
10439 ins_pipe(ialu_reg);
10440 %}
10441
10442 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10443 %{
10444 predicate(UseAPX && UseIncDec);
10445 match(Set dst (AddL (LoadL src) val));
10446 effect(KILL cr);
10447
10448 format %{ "edecq $dst, $src\t# long ndd" %}
10449 ins_encode %{
10450 __ edecq($dst$$Register, $src$$Address, false);
10451 %}
10452 ins_pipe(ialu_reg);
10453 %}
10454
10455 // XXX why does that use AddL
10456 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10457 %{
10458 predicate(UseIncDec);
10459 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10460 effect(KILL cr);
10461
10462 ins_cost(125); // XXX
10463 format %{ "decq $dst\t# long" %}
10464 ins_encode %{
10465 __ decrementq($dst$$Address);
10466 %}
10467 ins_pipe(ialu_mem_imm);
10468 %}
10469
10470 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10471 %{
10472 predicate(VM_Version::supports_fast_2op_lea());
10473 match(Set dst (AddL (LShiftL index scale) disp));
10474
10475 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10476 ins_encode %{
10477 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10478 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10479 %}
10480 ins_pipe(ialu_reg_reg);
10481 %}
10482
10483 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10484 %{
10485 predicate(VM_Version::supports_fast_3op_lea());
10486 match(Set dst (AddL (AddL base index) disp));
10487
10488 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10489 ins_encode %{
10490 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10491 %}
10492 ins_pipe(ialu_reg_reg);
10493 %}
10494
10495 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10496 %{
10497 predicate(VM_Version::supports_fast_2op_lea());
10498 match(Set dst (AddL base (LShiftL index scale)));
10499
10500 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10501 ins_encode %{
10502 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10503 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10504 %}
10505 ins_pipe(ialu_reg_reg);
10506 %}
10507
10508 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10509 %{
10510 predicate(VM_Version::supports_fast_3op_lea());
10511 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10512
10513 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10514 ins_encode %{
10515 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10516 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10517 %}
10518 ins_pipe(ialu_reg_reg);
10519 %}
10520
10521 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10522 %{
10523 match(Set dst (AddP dst src));
10524 effect(KILL cr);
10525 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10526
10527 format %{ "addq $dst, $src\t# ptr" %}
10528 ins_encode %{
10529 __ addq($dst$$Register, $src$$Register);
10530 %}
10531 ins_pipe(ialu_reg_reg);
10532 %}
10533
10534 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10535 %{
10536 match(Set dst (AddP dst src));
10537 effect(KILL cr);
10538 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10539
10540 format %{ "addq $dst, $src\t# ptr" %}
10541 ins_encode %{
10542 __ addq($dst$$Register, $src$$constant);
10543 %}
10544 ins_pipe( ialu_reg );
10545 %}
10546
10547 // XXX addP mem ops ????
10548
10549 instruct checkCastPP(rRegP dst)
10550 %{
10551 match(Set dst (CheckCastPP dst));
10552
10553 size(0);
10554 format %{ "# checkcastPP of $dst" %}
10555 ins_encode(/* empty encoding */);
10556 ins_pipe(empty);
10557 %}
10558
10559 instruct castPP(rRegP dst)
10560 %{
10561 match(Set dst (CastPP dst));
10562
10563 size(0);
10564 format %{ "# castPP of $dst" %}
10565 ins_encode(/* empty encoding */);
10566 ins_pipe(empty);
10567 %}
10568
10569 instruct castII(rRegI dst)
10570 %{
10571 predicate(VerifyConstraintCasts == 0);
10572 match(Set dst (CastII dst));
10573
10574 size(0);
10575 format %{ "# castII of $dst" %}
10576 ins_encode(/* empty encoding */);
10577 ins_cost(0);
10578 ins_pipe(empty);
10579 %}
10580
10581 instruct castII_checked(rRegI dst, rFlagsReg cr)
10582 %{
10583 predicate(VerifyConstraintCasts > 0);
10584 match(Set dst (CastII dst));
10585
10586 effect(KILL cr);
10587 format %{ "# cast_checked_II $dst" %}
10588 ins_encode %{
10589 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10590 %}
10591 ins_pipe(pipe_slow);
10592 %}
10593
10594 instruct castLL(rRegL dst)
10595 %{
10596 predicate(VerifyConstraintCasts == 0);
10597 match(Set dst (CastLL dst));
10598
10599 size(0);
10600 format %{ "# castLL of $dst" %}
10601 ins_encode(/* empty encoding */);
10602 ins_cost(0);
10603 ins_pipe(empty);
10604 %}
10605
10606 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10607 %{
10608 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10609 match(Set dst (CastLL dst));
10610
10611 effect(KILL cr);
10612 format %{ "# cast_checked_LL $dst" %}
10613 ins_encode %{
10614 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10615 %}
10616 ins_pipe(pipe_slow);
10617 %}
10618
10619 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10620 %{
10621 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10622 match(Set dst (CastLL dst));
10623
10624 effect(KILL cr, TEMP tmp);
10625 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10626 ins_encode %{
10627 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10628 %}
10629 ins_pipe(pipe_slow);
10630 %}
10631
10632 instruct castFF(regF dst)
10633 %{
10634 match(Set dst (CastFF dst));
10635
10636 size(0);
10637 format %{ "# castFF of $dst" %}
10638 ins_encode(/* empty encoding */);
10639 ins_cost(0);
10640 ins_pipe(empty);
10641 %}
10642
10643 instruct castHH(regF dst)
10644 %{
10645 match(Set dst (CastHH dst));
10646
10647 size(0);
10648 format %{ "# castHH of $dst" %}
10649 ins_encode(/* empty encoding */);
10650 ins_cost(0);
10651 ins_pipe(empty);
10652 %}
10653
10654 instruct castDD(regD dst)
10655 %{
10656 match(Set dst (CastDD dst));
10657
10658 size(0);
10659 format %{ "# castDD of $dst" %}
10660 ins_encode(/* empty encoding */);
10661 ins_cost(0);
10662 ins_pipe(empty);
10663 %}
10664
10665 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10666 instruct compareAndSwapP(rRegI res,
10667 memory mem_ptr,
10668 rax_RegP oldval, rRegP newval,
10669 rFlagsReg cr)
10670 %{
10671 predicate(n->as_LoadStore()->barrier_data() == 0);
10672 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10673 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10674 effect(KILL cr, KILL oldval);
10675
10676 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10677 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10678 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10679 ins_encode %{
10680 __ lock();
10681 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10682 __ setcc(Assembler::equal, $res$$Register);
10683 %}
10684 ins_pipe( pipe_cmpxchg );
10685 %}
10686
10687 instruct compareAndSwapL(rRegI res,
10688 memory mem_ptr,
10689 rax_RegL oldval, rRegL newval,
10690 rFlagsReg cr)
10691 %{
10692 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10693 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10694 effect(KILL cr, KILL oldval);
10695
10696 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10697 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10698 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10699 ins_encode %{
10700 __ lock();
10701 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10702 __ setcc(Assembler::equal, $res$$Register);
10703 %}
10704 ins_pipe( pipe_cmpxchg );
10705 %}
10706
10707 instruct compareAndSwapI(rRegI res,
10708 memory mem_ptr,
10709 rax_RegI oldval, rRegI newval,
10710 rFlagsReg cr)
10711 %{
10712 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10713 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10714 effect(KILL cr, KILL oldval);
10715
10716 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10717 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10718 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10719 ins_encode %{
10720 __ lock();
10721 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10722 __ setcc(Assembler::equal, $res$$Register);
10723 %}
10724 ins_pipe( pipe_cmpxchg );
10725 %}
10726
10727 instruct compareAndSwapB(rRegI res,
10728 memory mem_ptr,
10729 rax_RegI oldval, rRegI newval,
10730 rFlagsReg cr)
10731 %{
10732 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10733 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10734 effect(KILL cr, KILL oldval);
10735
10736 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10737 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10738 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10739 ins_encode %{
10740 __ lock();
10741 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10742 __ setcc(Assembler::equal, $res$$Register);
10743 %}
10744 ins_pipe( pipe_cmpxchg );
10745 %}
10746
10747 instruct compareAndSwapS(rRegI res,
10748 memory mem_ptr,
10749 rax_RegI oldval, rRegI newval,
10750 rFlagsReg cr)
10751 %{
10752 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10753 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10754 effect(KILL cr, KILL oldval);
10755
10756 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10757 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10758 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10759 ins_encode %{
10760 __ lock();
10761 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10762 __ setcc(Assembler::equal, $res$$Register);
10763 %}
10764 ins_pipe( pipe_cmpxchg );
10765 %}
10766
10767 instruct compareAndSwapN(rRegI res,
10768 memory mem_ptr,
10769 rax_RegN oldval, rRegN newval,
10770 rFlagsReg cr) %{
10771 predicate(n->as_LoadStore()->barrier_data() == 0);
10772 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10773 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10774 effect(KILL cr, KILL oldval);
10775
10776 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10777 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10778 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10779 ins_encode %{
10780 __ lock();
10781 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10782 __ setcc(Assembler::equal, $res$$Register);
10783 %}
10784 ins_pipe( pipe_cmpxchg );
10785 %}
10786
10787 instruct compareAndExchangeB(
10788 memory mem_ptr,
10789 rax_RegI oldval, rRegI newval,
10790 rFlagsReg cr)
10791 %{
10792 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10793 effect(KILL cr);
10794
10795 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10796 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10797 ins_encode %{
10798 __ lock();
10799 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10800 %}
10801 ins_pipe( pipe_cmpxchg );
10802 %}
10803
10804 instruct compareAndExchangeS(
10805 memory mem_ptr,
10806 rax_RegI oldval, rRegI newval,
10807 rFlagsReg cr)
10808 %{
10809 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10810 effect(KILL cr);
10811
10812 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10813 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10814 ins_encode %{
10815 __ lock();
10816 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10817 %}
10818 ins_pipe( pipe_cmpxchg );
10819 %}
10820
10821 instruct compareAndExchangeI(
10822 memory mem_ptr,
10823 rax_RegI oldval, rRegI newval,
10824 rFlagsReg cr)
10825 %{
10826 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10827 effect(KILL cr);
10828
10829 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10830 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10831 ins_encode %{
10832 __ lock();
10833 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10834 %}
10835 ins_pipe( pipe_cmpxchg );
10836 %}
10837
10838 instruct compareAndExchangeL(
10839 memory mem_ptr,
10840 rax_RegL oldval, rRegL newval,
10841 rFlagsReg cr)
10842 %{
10843 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10844 effect(KILL cr);
10845
10846 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10847 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10848 ins_encode %{
10849 __ lock();
10850 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10851 %}
10852 ins_pipe( pipe_cmpxchg );
10853 %}
10854
10855 instruct compareAndExchangeN(
10856 memory mem_ptr,
10857 rax_RegN oldval, rRegN newval,
10858 rFlagsReg cr) %{
10859 predicate(n->as_LoadStore()->barrier_data() == 0);
10860 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10861 effect(KILL cr);
10862
10863 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10864 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10865 ins_encode %{
10866 __ lock();
10867 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10868 %}
10869 ins_pipe( pipe_cmpxchg );
10870 %}
10871
10872 instruct compareAndExchangeP(
10873 memory mem_ptr,
10874 rax_RegP oldval, rRegP newval,
10875 rFlagsReg cr)
10876 %{
10877 predicate(n->as_LoadStore()->barrier_data() == 0);
10878 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10879 effect(KILL cr);
10880
10881 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10882 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10883 ins_encode %{
10884 __ lock();
10885 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10886 %}
10887 ins_pipe( pipe_cmpxchg );
10888 %}
10889
10890 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10891 predicate(n->as_LoadStore()->result_not_used());
10892 match(Set dummy (GetAndAddB mem add));
10893 effect(KILL cr);
10894 format %{ "addb_lock $mem, $add" %}
10895 ins_encode %{
10896 __ lock();
10897 __ addb($mem$$Address, $add$$Register);
10898 %}
10899 ins_pipe(pipe_cmpxchg);
10900 %}
10901
10902 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10903 predicate(n->as_LoadStore()->result_not_used());
10904 match(Set dummy (GetAndAddB mem add));
10905 effect(KILL cr);
10906 format %{ "addb_lock $mem, $add" %}
10907 ins_encode %{
10908 __ lock();
10909 __ addb($mem$$Address, $add$$constant);
10910 %}
10911 ins_pipe(pipe_cmpxchg);
10912 %}
10913
10914 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10915 predicate(!n->as_LoadStore()->result_not_used());
10916 match(Set newval (GetAndAddB mem newval));
10917 effect(KILL cr);
10918 format %{ "xaddb_lock $mem, $newval" %}
10919 ins_encode %{
10920 __ lock();
10921 __ xaddb($mem$$Address, $newval$$Register);
10922 %}
10923 ins_pipe(pipe_cmpxchg);
10924 %}
10925
10926 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10927 predicate(n->as_LoadStore()->result_not_used());
10928 match(Set dummy (GetAndAddS mem add));
10929 effect(KILL cr);
10930 format %{ "addw_lock $mem, $add" %}
10931 ins_encode %{
10932 __ lock();
10933 __ addw($mem$$Address, $add$$Register);
10934 %}
10935 ins_pipe(pipe_cmpxchg);
10936 %}
10937
10938 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10939 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10940 match(Set dummy (GetAndAddS mem add));
10941 effect(KILL cr);
10942 format %{ "addw_lock $mem, $add" %}
10943 ins_encode %{
10944 __ lock();
10945 __ addw($mem$$Address, $add$$constant);
10946 %}
10947 ins_pipe(pipe_cmpxchg);
10948 %}
10949
10950 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10951 predicate(!n->as_LoadStore()->result_not_used());
10952 match(Set newval (GetAndAddS mem newval));
10953 effect(KILL cr);
10954 format %{ "xaddw_lock $mem, $newval" %}
10955 ins_encode %{
10956 __ lock();
10957 __ xaddw($mem$$Address, $newval$$Register);
10958 %}
10959 ins_pipe(pipe_cmpxchg);
10960 %}
10961
10962 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10963 predicate(n->as_LoadStore()->result_not_used());
10964 match(Set dummy (GetAndAddI mem add));
10965 effect(KILL cr);
10966 format %{ "addl_lock $mem, $add" %}
10967 ins_encode %{
10968 __ lock();
10969 __ addl($mem$$Address, $add$$Register);
10970 %}
10971 ins_pipe(pipe_cmpxchg);
10972 %}
10973
10974 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10975 predicate(n->as_LoadStore()->result_not_used());
10976 match(Set dummy (GetAndAddI mem add));
10977 effect(KILL cr);
10978 format %{ "addl_lock $mem, $add" %}
10979 ins_encode %{
10980 __ lock();
10981 __ addl($mem$$Address, $add$$constant);
10982 %}
10983 ins_pipe(pipe_cmpxchg);
10984 %}
10985
10986 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10987 predicate(!n->as_LoadStore()->result_not_used());
10988 match(Set newval (GetAndAddI mem newval));
10989 effect(KILL cr);
10990 format %{ "xaddl_lock $mem, $newval" %}
10991 ins_encode %{
10992 __ lock();
10993 __ xaddl($mem$$Address, $newval$$Register);
10994 %}
10995 ins_pipe(pipe_cmpxchg);
10996 %}
10997
10998 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10999 predicate(n->as_LoadStore()->result_not_used());
11000 match(Set dummy (GetAndAddL mem add));
11001 effect(KILL cr);
11002 format %{ "addq_lock $mem, $add" %}
11003 ins_encode %{
11004 __ lock();
11005 __ addq($mem$$Address, $add$$Register);
11006 %}
11007 ins_pipe(pipe_cmpxchg);
11008 %}
11009
11010 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11011 predicate(n->as_LoadStore()->result_not_used());
11012 match(Set dummy (GetAndAddL mem add));
11013 effect(KILL cr);
11014 format %{ "addq_lock $mem, $add" %}
11015 ins_encode %{
11016 __ lock();
11017 __ addq($mem$$Address, $add$$constant);
11018 %}
11019 ins_pipe(pipe_cmpxchg);
11020 %}
11021
11022 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11023 predicate(!n->as_LoadStore()->result_not_used());
11024 match(Set newval (GetAndAddL mem newval));
11025 effect(KILL cr);
11026 format %{ "xaddq_lock $mem, $newval" %}
11027 ins_encode %{
11028 __ lock();
11029 __ xaddq($mem$$Address, $newval$$Register);
11030 %}
11031 ins_pipe(pipe_cmpxchg);
11032 %}
11033
11034 instruct xchgB( memory mem, rRegI newval) %{
11035 match(Set newval (GetAndSetB mem newval));
11036 format %{ "XCHGB $newval,[$mem]" %}
11037 ins_encode %{
11038 __ xchgb($newval$$Register, $mem$$Address);
11039 %}
11040 ins_pipe( pipe_cmpxchg );
11041 %}
11042
11043 instruct xchgS( memory mem, rRegI newval) %{
11044 match(Set newval (GetAndSetS mem newval));
11045 format %{ "XCHGW $newval,[$mem]" %}
11046 ins_encode %{
11047 __ xchgw($newval$$Register, $mem$$Address);
11048 %}
11049 ins_pipe( pipe_cmpxchg );
11050 %}
11051
11052 instruct xchgI( memory mem, rRegI newval) %{
11053 match(Set newval (GetAndSetI mem newval));
11054 format %{ "XCHGL $newval,[$mem]" %}
11055 ins_encode %{
11056 __ xchgl($newval$$Register, $mem$$Address);
11057 %}
11058 ins_pipe( pipe_cmpxchg );
11059 %}
11060
11061 instruct xchgL( memory mem, rRegL newval) %{
11062 match(Set newval (GetAndSetL mem newval));
11063 format %{ "XCHGL $newval,[$mem]" %}
11064 ins_encode %{
11065 __ xchgq($newval$$Register, $mem$$Address);
11066 %}
11067 ins_pipe( pipe_cmpxchg );
11068 %}
11069
11070 instruct xchgP( memory mem, rRegP newval) %{
11071 match(Set newval (GetAndSetP mem newval));
11072 predicate(n->as_LoadStore()->barrier_data() == 0);
11073 format %{ "XCHGQ $newval,[$mem]" %}
11074 ins_encode %{
11075 __ xchgq($newval$$Register, $mem$$Address);
11076 %}
11077 ins_pipe( pipe_cmpxchg );
11078 %}
11079
11080 instruct xchgN( memory mem, rRegN newval) %{
11081 predicate(n->as_LoadStore()->barrier_data() == 0);
11082 match(Set newval (GetAndSetN mem newval));
11083 format %{ "XCHGL $newval,$mem]" %}
11084 ins_encode %{
11085 __ xchgl($newval$$Register, $mem$$Address);
11086 %}
11087 ins_pipe( pipe_cmpxchg );
11088 %}
11089
11090 //----------Abs Instructions-------------------------------------------
11091
11092 // Integer Absolute Instructions
11093 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11094 %{
11095 match(Set dst (AbsI src));
11096 effect(TEMP dst, KILL cr);
11097 format %{ "xorl $dst, $dst\t# abs int\n\t"
11098 "subl $dst, $src\n\t"
11099 "cmovll $dst, $src" %}
11100 ins_encode %{
11101 __ xorl($dst$$Register, $dst$$Register);
11102 __ subl($dst$$Register, $src$$Register);
11103 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11104 %}
11105
11106 ins_pipe(ialu_reg_reg);
11107 %}
11108
11109 // Long Absolute Instructions
11110 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11111 %{
11112 match(Set dst (AbsL src));
11113 effect(TEMP dst, KILL cr);
11114 format %{ "xorl $dst, $dst\t# abs long\n\t"
11115 "subq $dst, $src\n\t"
11116 "cmovlq $dst, $src" %}
11117 ins_encode %{
11118 __ xorl($dst$$Register, $dst$$Register);
11119 __ subq($dst$$Register, $src$$Register);
11120 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11121 %}
11122
11123 ins_pipe(ialu_reg_reg);
11124 %}
11125
11126 //----------Subtraction Instructions-------------------------------------------
11127
11128 // Integer Subtraction Instructions
11129 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11130 %{
11131 predicate(!UseAPX);
11132 match(Set dst (SubI dst src));
11133 effect(KILL cr);
11134 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11135
11136 format %{ "subl $dst, $src\t# int" %}
11137 ins_encode %{
11138 __ subl($dst$$Register, $src$$Register);
11139 %}
11140 ins_pipe(ialu_reg_reg);
11141 %}
11142
11143 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11144 %{
11145 predicate(UseAPX);
11146 match(Set dst (SubI src1 src2));
11147 effect(KILL cr);
11148 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11149
11150 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11151 ins_encode %{
11152 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11153 %}
11154 ins_pipe(ialu_reg_reg);
11155 %}
11156
11157 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11158 %{
11159 predicate(UseAPX);
11160 match(Set dst (SubI src1 src2));
11161 effect(KILL cr);
11162 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11163
11164 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11165 ins_encode %{
11166 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11167 %}
11168 ins_pipe(ialu_reg_reg);
11169 %}
11170
11171 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11172 %{
11173 predicate(UseAPX);
11174 match(Set dst (SubI (LoadI src1) src2));
11175 effect(KILL cr);
11176 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11177
11178 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11179 ins_encode %{
11180 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11181 %}
11182 ins_pipe(ialu_reg_reg);
11183 %}
11184
11185 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11186 %{
11187 predicate(!UseAPX);
11188 match(Set dst (SubI dst (LoadI src)));
11189 effect(KILL cr);
11190 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11191
11192 ins_cost(150);
11193 format %{ "subl $dst, $src\t# int" %}
11194 ins_encode %{
11195 __ subl($dst$$Register, $src$$Address);
11196 %}
11197 ins_pipe(ialu_reg_mem);
11198 %}
11199
11200 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11201 %{
11202 predicate(UseAPX);
11203 match(Set dst (SubI src1 (LoadI src2)));
11204 effect(KILL cr);
11205 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11206
11207 ins_cost(150);
11208 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11209 ins_encode %{
11210 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11211 %}
11212 ins_pipe(ialu_reg_mem);
11213 %}
11214
11215 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11216 %{
11217 predicate(UseAPX);
11218 match(Set dst (SubI (LoadI src1) src2));
11219 effect(KILL cr);
11220 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11221
11222 ins_cost(150);
11223 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11224 ins_encode %{
11225 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11226 %}
11227 ins_pipe(ialu_reg_mem);
11228 %}
11229
11230 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11231 %{
11232 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11233 effect(KILL cr);
11234 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11235
11236 ins_cost(150);
11237 format %{ "subl $dst, $src\t# int" %}
11238 ins_encode %{
11239 __ subl($dst$$Address, $src$$Register);
11240 %}
11241 ins_pipe(ialu_mem_reg);
11242 %}
11243
11244 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11245 %{
11246 predicate(!UseAPX);
11247 match(Set dst (SubL dst src));
11248 effect(KILL cr);
11249 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11250
11251 format %{ "subq $dst, $src\t# long" %}
11252 ins_encode %{
11253 __ subq($dst$$Register, $src$$Register);
11254 %}
11255 ins_pipe(ialu_reg_reg);
11256 %}
11257
11258 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11259 %{
11260 predicate(UseAPX);
11261 match(Set dst (SubL src1 src2));
11262 effect(KILL cr);
11263 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11264
11265 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11266 ins_encode %{
11267 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11268 %}
11269 ins_pipe(ialu_reg_reg);
11270 %}
11271
11272 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11273 %{
11274 predicate(UseAPX);
11275 match(Set dst (SubL src1 src2));
11276 effect(KILL cr);
11277 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11278
11279 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11280 ins_encode %{
11281 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11282 %}
11283 ins_pipe(ialu_reg_reg);
11284 %}
11285
11286 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11287 %{
11288 predicate(UseAPX);
11289 match(Set dst (SubL (LoadL src1) src2));
11290 effect(KILL cr);
11291 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11292
11293 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11294 ins_encode %{
11295 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11296 %}
11297 ins_pipe(ialu_reg_reg);
11298 %}
11299
11300 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11301 %{
11302 predicate(!UseAPX);
11303 match(Set dst (SubL dst (LoadL src)));
11304 effect(KILL cr);
11305 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11306
11307 ins_cost(150);
11308 format %{ "subq $dst, $src\t# long" %}
11309 ins_encode %{
11310 __ subq($dst$$Register, $src$$Address);
11311 %}
11312 ins_pipe(ialu_reg_mem);
11313 %}
11314
11315 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11316 %{
11317 predicate(UseAPX);
11318 match(Set dst (SubL src1 (LoadL src2)));
11319 effect(KILL cr);
11320 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11321
11322 ins_cost(150);
11323 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11324 ins_encode %{
11325 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11326 %}
11327 ins_pipe(ialu_reg_mem);
11328 %}
11329
11330 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11331 %{
11332 predicate(UseAPX);
11333 match(Set dst (SubL (LoadL src1) src2));
11334 effect(KILL cr);
11335 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11336
11337 ins_cost(150);
11338 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11339 ins_encode %{
11340 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11341 %}
11342 ins_pipe(ialu_reg_mem);
11343 %}
11344
11345 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11346 %{
11347 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11348 effect(KILL cr);
11349 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11350
11351 ins_cost(150);
11352 format %{ "subq $dst, $src\t# long" %}
11353 ins_encode %{
11354 __ subq($dst$$Address, $src$$Register);
11355 %}
11356 ins_pipe(ialu_mem_reg);
11357 %}
11358
11359 // Subtract from a pointer
11360 // XXX hmpf???
11361 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11362 %{
11363 match(Set dst (AddP dst (SubI zero src)));
11364 effect(KILL cr);
11365
11366 format %{ "subq $dst, $src\t# ptr - int" %}
11367 ins_encode %{
11368 __ subq($dst$$Register, $src$$Register);
11369 %}
11370 ins_pipe(ialu_reg_reg);
11371 %}
11372
11373 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11374 %{
11375 predicate(!UseAPX);
11376 match(Set dst (SubI zero dst));
11377 effect(KILL cr);
11378 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11379
11380 format %{ "negl $dst\t# int" %}
11381 ins_encode %{
11382 __ negl($dst$$Register);
11383 %}
11384 ins_pipe(ialu_reg);
11385 %}
11386
11387 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11388 %{
11389 predicate(UseAPX);
11390 match(Set dst (SubI zero src));
11391 effect(KILL cr);
11392 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11393
11394 format %{ "enegl $dst, $src\t# int ndd" %}
11395 ins_encode %{
11396 __ enegl($dst$$Register, $src$$Register, false);
11397 %}
11398 ins_pipe(ialu_reg);
11399 %}
11400
11401 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11402 %{
11403 predicate(!UseAPX);
11404 match(Set dst (NegI dst));
11405 effect(KILL cr);
11406 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11407
11408 format %{ "negl $dst\t# int" %}
11409 ins_encode %{
11410 __ negl($dst$$Register);
11411 %}
11412 ins_pipe(ialu_reg);
11413 %}
11414
11415 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11416 %{
11417 predicate(UseAPX);
11418 match(Set dst (NegI src));
11419 effect(KILL cr);
11420 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11421
11422 format %{ "enegl $dst, $src\t# int ndd" %}
11423 ins_encode %{
11424 __ enegl($dst$$Register, $src$$Register, false);
11425 %}
11426 ins_pipe(ialu_reg);
11427 %}
11428
11429 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11430 %{
11431 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11432 effect(KILL cr);
11433 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11434
11435 format %{ "negl $dst\t# int" %}
11436 ins_encode %{
11437 __ negl($dst$$Address);
11438 %}
11439 ins_pipe(ialu_reg);
11440 %}
11441
11442 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11443 %{
11444 predicate(!UseAPX);
11445 match(Set dst (SubL zero dst));
11446 effect(KILL cr);
11447 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11448
11449 format %{ "negq $dst\t# long" %}
11450 ins_encode %{
11451 __ negq($dst$$Register);
11452 %}
11453 ins_pipe(ialu_reg);
11454 %}
11455
11456 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11457 %{
11458 predicate(UseAPX);
11459 match(Set dst (SubL zero src));
11460 effect(KILL cr);
11461 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11462
11463 format %{ "enegq $dst, $src\t# long ndd" %}
11464 ins_encode %{
11465 __ enegq($dst$$Register, $src$$Register, false);
11466 %}
11467 ins_pipe(ialu_reg);
11468 %}
11469
11470 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11471 %{
11472 predicate(!UseAPX);
11473 match(Set dst (NegL dst));
11474 effect(KILL cr);
11475 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11476
11477 format %{ "negq $dst\t# int" %}
11478 ins_encode %{
11479 __ negq($dst$$Register);
11480 %}
11481 ins_pipe(ialu_reg);
11482 %}
11483
11484 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11485 %{
11486 predicate(UseAPX);
11487 match(Set dst (NegL src));
11488 effect(KILL cr);
11489 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11490
11491 format %{ "enegq $dst, $src\t# long ndd" %}
11492 ins_encode %{
11493 __ enegq($dst$$Register, $src$$Register, false);
11494 %}
11495 ins_pipe(ialu_reg);
11496 %}
11497
11498 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11499 %{
11500 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11501 effect(KILL cr);
11502 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11503
11504 format %{ "negq $dst\t# long" %}
11505 ins_encode %{
11506 __ negq($dst$$Address);
11507 %}
11508 ins_pipe(ialu_reg);
11509 %}
11510
11511 //----------Multiplication/Division Instructions-------------------------------
11512 // Integer Multiplication Instructions
11513 // Multiply Register
11514
11515 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11516 %{
11517 predicate(!UseAPX);
11518 match(Set dst (MulI dst src));
11519 effect(KILL cr);
11520
11521 ins_cost(300);
11522 format %{ "imull $dst, $src\t# int" %}
11523 ins_encode %{
11524 __ imull($dst$$Register, $src$$Register);
11525 %}
11526 ins_pipe(ialu_reg_reg_alu0);
11527 %}
11528
11529 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11530 %{
11531 predicate(UseAPX);
11532 match(Set dst (MulI src1 src2));
11533 effect(KILL cr);
11534 flag(PD::Flag_ndd_demotable_commutative);
11535
11536 ins_cost(300);
11537 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11538 ins_encode %{
11539 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11540 %}
11541 ins_pipe(ialu_reg_reg_alu0);
11542 %}
11543
11544 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11545 %{
11546 match(Set dst (MulI src imm));
11547 effect(KILL cr);
11548
11549 ins_cost(300);
11550 format %{ "imull $dst, $src, $imm\t# int" %}
11551 ins_encode %{
11552 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11553 %}
11554 ins_pipe(ialu_reg_reg_alu0);
11555 %}
11556
11557 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11558 %{
11559 predicate(!UseAPX);
11560 match(Set dst (MulI dst (LoadI src)));
11561 effect(KILL cr);
11562
11563 ins_cost(350);
11564 format %{ "imull $dst, $src\t# int" %}
11565 ins_encode %{
11566 __ imull($dst$$Register, $src$$Address);
11567 %}
11568 ins_pipe(ialu_reg_mem_alu0);
11569 %}
11570
11571 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11572 %{
11573 predicate(UseAPX);
11574 match(Set dst (MulI src1 (LoadI src2)));
11575 effect(KILL cr);
11576 flag(PD::Flag_ndd_demotable);
11577
11578 ins_cost(350);
11579 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11580 ins_encode %{
11581 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11582 %}
11583 ins_pipe(ialu_reg_mem_alu0);
11584 %}
11585
11586 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11587 %{
11588 match(Set dst (MulI (LoadI src) imm));
11589 effect(KILL cr);
11590
11591 ins_cost(300);
11592 format %{ "imull $dst, $src, $imm\t# int" %}
11593 ins_encode %{
11594 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11595 %}
11596 ins_pipe(ialu_reg_mem_alu0);
11597 %}
11598
11599 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11600 %{
11601 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11602 effect(KILL cr, KILL src2);
11603
11604 expand %{ mulI_rReg(dst, src1, cr);
11605 mulI_rReg(src2, src3, cr);
11606 addI_rReg(dst, src2, cr); %}
11607 %}
11608
11609 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11610 %{
11611 predicate(!UseAPX);
11612 match(Set dst (MulL dst src));
11613 effect(KILL cr);
11614
11615 ins_cost(300);
11616 format %{ "imulq $dst, $src\t# long" %}
11617 ins_encode %{
11618 __ imulq($dst$$Register, $src$$Register);
11619 %}
11620 ins_pipe(ialu_reg_reg_alu0);
11621 %}
11622
11623 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11624 %{
11625 predicate(UseAPX);
11626 match(Set dst (MulL src1 src2));
11627 effect(KILL cr);
11628 flag(PD::Flag_ndd_demotable_commutative);
11629
11630 ins_cost(300);
11631 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11632 ins_encode %{
11633 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11634 %}
11635 ins_pipe(ialu_reg_reg_alu0);
11636 %}
11637
11638 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11639 %{
11640 match(Set dst (MulL src imm));
11641 effect(KILL cr);
11642
11643 ins_cost(300);
11644 format %{ "imulq $dst, $src, $imm\t# long" %}
11645 ins_encode %{
11646 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11647 %}
11648 ins_pipe(ialu_reg_reg_alu0);
11649 %}
11650
11651 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11652 %{
11653 predicate(!UseAPX);
11654 match(Set dst (MulL dst (LoadL src)));
11655 effect(KILL cr);
11656
11657 ins_cost(350);
11658 format %{ "imulq $dst, $src\t# long" %}
11659 ins_encode %{
11660 __ imulq($dst$$Register, $src$$Address);
11661 %}
11662 ins_pipe(ialu_reg_mem_alu0);
11663 %}
11664
11665 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11666 %{
11667 predicate(UseAPX);
11668 match(Set dst (MulL src1 (LoadL src2)));
11669 effect(KILL cr);
11670 flag(PD::Flag_ndd_demotable_commutative);
11671
11672 ins_cost(350);
11673 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11674 ins_encode %{
11675 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11676 %}
11677 ins_pipe(ialu_reg_mem_alu0);
11678 %}
11679
11680 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11681 %{
11682 match(Set dst (MulL (LoadL src) imm));
11683 effect(KILL cr);
11684
11685 ins_cost(300);
11686 format %{ "imulq $dst, $src, $imm\t# long" %}
11687 ins_encode %{
11688 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11689 %}
11690 ins_pipe(ialu_reg_mem_alu0);
11691 %}
11692
11693 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11694 %{
11695 match(Set dst (MulHiL src rax));
11696 effect(USE_KILL rax, KILL cr);
11697
11698 ins_cost(300);
11699 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11700 ins_encode %{
11701 __ imulq($src$$Register);
11702 %}
11703 ins_pipe(ialu_reg_reg_alu0);
11704 %}
11705
11706 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11707 %{
11708 match(Set dst (UMulHiL src rax));
11709 effect(USE_KILL rax, KILL cr);
11710
11711 ins_cost(300);
11712 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11713 ins_encode %{
11714 __ mulq($src$$Register);
11715 %}
11716 ins_pipe(ialu_reg_reg_alu0);
11717 %}
11718
11719 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11720 rFlagsReg cr)
11721 %{
11722 match(Set rax (DivI rax div));
11723 effect(KILL rdx, KILL cr);
11724
11725 ins_cost(30*100+10*100); // XXX
11726 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11727 "jne,s normal\n\t"
11728 "xorl rdx, rdx\n\t"
11729 "cmpl $div, -1\n\t"
11730 "je,s done\n"
11731 "normal: cdql\n\t"
11732 "idivl $div\n"
11733 "done:" %}
11734 ins_encode(cdql_enc(div));
11735 ins_pipe(ialu_reg_reg_alu0);
11736 %}
11737
11738 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11739 rFlagsReg cr)
11740 %{
11741 match(Set rax (DivL rax div));
11742 effect(KILL rdx, KILL cr);
11743
11744 ins_cost(30*100+10*100); // XXX
11745 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11746 "cmpq rax, rdx\n\t"
11747 "jne,s normal\n\t"
11748 "xorl rdx, rdx\n\t"
11749 "cmpq $div, -1\n\t"
11750 "je,s done\n"
11751 "normal: cdqq\n\t"
11752 "idivq $div\n"
11753 "done:" %}
11754 ins_encode(cdqq_enc(div));
11755 ins_pipe(ialu_reg_reg_alu0);
11756 %}
11757
11758 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11759 %{
11760 match(Set rax (UDivI rax div));
11761 effect(KILL rdx, KILL cr);
11762
11763 ins_cost(300);
11764 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11765 ins_encode %{
11766 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11767 %}
11768 ins_pipe(ialu_reg_reg_alu0);
11769 %}
11770
11771 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11772 %{
11773 match(Set rax (UDivL rax div));
11774 effect(KILL rdx, KILL cr);
11775
11776 ins_cost(300);
11777 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11778 ins_encode %{
11779 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11780 %}
11781 ins_pipe(ialu_reg_reg_alu0);
11782 %}
11783
11784 // Integer DIVMOD with Register, both quotient and mod results
11785 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11786 rFlagsReg cr)
11787 %{
11788 match(DivModI rax div);
11789 effect(KILL cr);
11790
11791 ins_cost(30*100+10*100); // XXX
11792 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11793 "jne,s normal\n\t"
11794 "xorl rdx, rdx\n\t"
11795 "cmpl $div, -1\n\t"
11796 "je,s done\n"
11797 "normal: cdql\n\t"
11798 "idivl $div\n"
11799 "done:" %}
11800 ins_encode(cdql_enc(div));
11801 ins_pipe(pipe_slow);
11802 %}
11803
11804 // Long DIVMOD with Register, both quotient and mod results
11805 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11806 rFlagsReg cr)
11807 %{
11808 match(DivModL rax div);
11809 effect(KILL cr);
11810
11811 ins_cost(30*100+10*100); // XXX
11812 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11813 "cmpq rax, rdx\n\t"
11814 "jne,s normal\n\t"
11815 "xorl rdx, rdx\n\t"
11816 "cmpq $div, -1\n\t"
11817 "je,s done\n"
11818 "normal: cdqq\n\t"
11819 "idivq $div\n"
11820 "done:" %}
11821 ins_encode(cdqq_enc(div));
11822 ins_pipe(pipe_slow);
11823 %}
11824
11825 // Unsigned integer DIVMOD with Register, both quotient and mod results
11826 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11827 no_rax_rdx_RegI div, rFlagsReg cr)
11828 %{
11829 match(UDivModI rax div);
11830 effect(TEMP tmp, KILL cr);
11831
11832 ins_cost(300);
11833 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11834 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11835 %}
11836 ins_encode %{
11837 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11838 %}
11839 ins_pipe(pipe_slow);
11840 %}
11841
11842 // Unsigned long DIVMOD with Register, both quotient and mod results
11843 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11844 no_rax_rdx_RegL div, rFlagsReg cr)
11845 %{
11846 match(UDivModL rax div);
11847 effect(TEMP tmp, KILL cr);
11848
11849 ins_cost(300);
11850 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11851 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11852 %}
11853 ins_encode %{
11854 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11855 %}
11856 ins_pipe(pipe_slow);
11857 %}
11858
11859 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11860 rFlagsReg cr)
11861 %{
11862 match(Set rdx (ModI rax div));
11863 effect(KILL rax, KILL cr);
11864
11865 ins_cost(300); // XXX
11866 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11867 "jne,s normal\n\t"
11868 "xorl rdx, rdx\n\t"
11869 "cmpl $div, -1\n\t"
11870 "je,s done\n"
11871 "normal: cdql\n\t"
11872 "idivl $div\n"
11873 "done:" %}
11874 ins_encode(cdql_enc(div));
11875 ins_pipe(ialu_reg_reg_alu0);
11876 %}
11877
11878 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11879 rFlagsReg cr)
11880 %{
11881 match(Set rdx (ModL rax div));
11882 effect(KILL rax, KILL cr);
11883
11884 ins_cost(300); // XXX
11885 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11886 "cmpq rax, rdx\n\t"
11887 "jne,s normal\n\t"
11888 "xorl rdx, rdx\n\t"
11889 "cmpq $div, -1\n\t"
11890 "je,s done\n"
11891 "normal: cdqq\n\t"
11892 "idivq $div\n"
11893 "done:" %}
11894 ins_encode(cdqq_enc(div));
11895 ins_pipe(ialu_reg_reg_alu0);
11896 %}
11897
11898 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11899 %{
11900 match(Set rdx (UModI rax div));
11901 effect(KILL rax, KILL cr);
11902
11903 ins_cost(300);
11904 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11905 ins_encode %{
11906 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11907 %}
11908 ins_pipe(ialu_reg_reg_alu0);
11909 %}
11910
11911 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11912 %{
11913 match(Set rdx (UModL rax div));
11914 effect(KILL rax, KILL cr);
11915
11916 ins_cost(300);
11917 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11918 ins_encode %{
11919 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11920 %}
11921 ins_pipe(ialu_reg_reg_alu0);
11922 %}
11923
11924 // Integer Shift Instructions
11925 // Shift Left by one, two, three
11926 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11927 %{
11928 predicate(!UseAPX);
11929 match(Set dst (LShiftI dst shift));
11930 effect(KILL cr);
11931
11932 format %{ "sall $dst, $shift" %}
11933 ins_encode %{
11934 __ sall($dst$$Register, $shift$$constant);
11935 %}
11936 ins_pipe(ialu_reg);
11937 %}
11938
11939 // Shift Left by one, two, three
11940 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11941 %{
11942 predicate(UseAPX);
11943 match(Set dst (LShiftI src shift));
11944 effect(KILL cr);
11945 flag(PD::Flag_ndd_demotable);
11946
11947 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11948 ins_encode %{
11949 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11950 %}
11951 ins_pipe(ialu_reg);
11952 %}
11953
11954 // Shift Left by 8-bit immediate
11955 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11956 %{
11957 predicate(!UseAPX);
11958 match(Set dst (LShiftI dst shift));
11959 effect(KILL cr);
11960
11961 format %{ "sall $dst, $shift" %}
11962 ins_encode %{
11963 __ sall($dst$$Register, $shift$$constant);
11964 %}
11965 ins_pipe(ialu_reg);
11966 %}
11967
11968 // Shift Left by 8-bit immediate
11969 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11970 %{
11971 predicate(UseAPX);
11972 match(Set dst (LShiftI src shift));
11973 effect(KILL cr);
11974 flag(PD::Flag_ndd_demotable);
11975
11976 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11977 ins_encode %{
11978 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11979 %}
11980 ins_pipe(ialu_reg);
11981 %}
11982
11983 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11984 %{
11985 predicate(UseAPX);
11986 match(Set dst (LShiftI (LoadI src) shift));
11987 effect(KILL cr);
11988
11989 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11990 ins_encode %{
11991 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11992 %}
11993 ins_pipe(ialu_reg);
11994 %}
11995
11996 // Shift Left by 8-bit immediate
11997 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11998 %{
11999 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12000 effect(KILL cr);
12001
12002 format %{ "sall $dst, $shift" %}
12003 ins_encode %{
12004 __ sall($dst$$Address, $shift$$constant);
12005 %}
12006 ins_pipe(ialu_mem_imm);
12007 %}
12008
12009 // Shift Left by variable
12010 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12011 %{
12012 predicate(!VM_Version::supports_bmi2());
12013 match(Set dst (LShiftI dst shift));
12014 effect(KILL cr);
12015
12016 format %{ "sall $dst, $shift" %}
12017 ins_encode %{
12018 __ sall($dst$$Register);
12019 %}
12020 ins_pipe(ialu_reg_reg);
12021 %}
12022
12023 // Shift Left by variable
12024 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12025 %{
12026 predicate(!VM_Version::supports_bmi2());
12027 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12028 effect(KILL cr);
12029
12030 format %{ "sall $dst, $shift" %}
12031 ins_encode %{
12032 __ sall($dst$$Address);
12033 %}
12034 ins_pipe(ialu_mem_reg);
12035 %}
12036
12037 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12038 %{
12039 predicate(VM_Version::supports_bmi2());
12040 match(Set dst (LShiftI src shift));
12041
12042 format %{ "shlxl $dst, $src, $shift" %}
12043 ins_encode %{
12044 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12045 %}
12046 ins_pipe(ialu_reg_reg);
12047 %}
12048
12049 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12050 %{
12051 predicate(VM_Version::supports_bmi2());
12052 match(Set dst (LShiftI (LoadI src) shift));
12053 ins_cost(175);
12054 format %{ "shlxl $dst, $src, $shift" %}
12055 ins_encode %{
12056 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12057 %}
12058 ins_pipe(ialu_reg_mem);
12059 %}
12060
12061 // Arithmetic Shift Right by 8-bit immediate
12062 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12063 %{
12064 predicate(!UseAPX);
12065 match(Set dst (RShiftI dst shift));
12066 effect(KILL cr);
12067
12068 format %{ "sarl $dst, $shift" %}
12069 ins_encode %{
12070 __ sarl($dst$$Register, $shift$$constant);
12071 %}
12072 ins_pipe(ialu_mem_imm);
12073 %}
12074
12075 // Arithmetic Shift Right by 8-bit immediate
12076 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12077 %{
12078 predicate(UseAPX);
12079 match(Set dst (RShiftI src shift));
12080 effect(KILL cr);
12081 flag(PD::Flag_ndd_demotable);
12082
12083 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12084 ins_encode %{
12085 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12086 %}
12087 ins_pipe(ialu_mem_imm);
12088 %}
12089
12090 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12091 %{
12092 predicate(UseAPX);
12093 match(Set dst (RShiftI (LoadI src) shift));
12094 effect(KILL cr);
12095
12096 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12097 ins_encode %{
12098 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12099 %}
12100 ins_pipe(ialu_mem_imm);
12101 %}
12102
12103 // Arithmetic Shift Right by 8-bit immediate
12104 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12105 %{
12106 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12107 effect(KILL cr);
12108
12109 format %{ "sarl $dst, $shift" %}
12110 ins_encode %{
12111 __ sarl($dst$$Address, $shift$$constant);
12112 %}
12113 ins_pipe(ialu_mem_imm);
12114 %}
12115
12116 // Arithmetic Shift Right by variable
12117 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12118 %{
12119 predicate(!VM_Version::supports_bmi2());
12120 match(Set dst (RShiftI dst shift));
12121 effect(KILL cr);
12122
12123 format %{ "sarl $dst, $shift" %}
12124 ins_encode %{
12125 __ sarl($dst$$Register);
12126 %}
12127 ins_pipe(ialu_reg_reg);
12128 %}
12129
12130 // Arithmetic Shift Right by variable
12131 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12132 %{
12133 predicate(!VM_Version::supports_bmi2());
12134 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12135 effect(KILL cr);
12136
12137 format %{ "sarl $dst, $shift" %}
12138 ins_encode %{
12139 __ sarl($dst$$Address);
12140 %}
12141 ins_pipe(ialu_mem_reg);
12142 %}
12143
12144 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12145 %{
12146 predicate(VM_Version::supports_bmi2());
12147 match(Set dst (RShiftI src shift));
12148
12149 format %{ "sarxl $dst, $src, $shift" %}
12150 ins_encode %{
12151 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12152 %}
12153 ins_pipe(ialu_reg_reg);
12154 %}
12155
12156 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12157 %{
12158 predicate(VM_Version::supports_bmi2());
12159 match(Set dst (RShiftI (LoadI src) shift));
12160 ins_cost(175);
12161 format %{ "sarxl $dst, $src, $shift" %}
12162 ins_encode %{
12163 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12164 %}
12165 ins_pipe(ialu_reg_mem);
12166 %}
12167
12168 // Logical Shift Right by 8-bit immediate
12169 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12170 %{
12171 predicate(!UseAPX);
12172 match(Set dst (URShiftI dst shift));
12173 effect(KILL cr);
12174
12175 format %{ "shrl $dst, $shift" %}
12176 ins_encode %{
12177 __ shrl($dst$$Register, $shift$$constant);
12178 %}
12179 ins_pipe(ialu_reg);
12180 %}
12181
12182 // Logical Shift Right by 8-bit immediate
12183 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12184 %{
12185 predicate(UseAPX);
12186 match(Set dst (URShiftI src shift));
12187 effect(KILL cr);
12188 flag(PD::Flag_ndd_demotable);
12189
12190 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12191 ins_encode %{
12192 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12193 %}
12194 ins_pipe(ialu_reg);
12195 %}
12196
12197 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12198 %{
12199 predicate(UseAPX);
12200 match(Set dst (URShiftI (LoadI src) shift));
12201 effect(KILL cr);
12202
12203 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12204 ins_encode %{
12205 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12206 %}
12207 ins_pipe(ialu_reg);
12208 %}
12209
12210 // Logical Shift Right by 8-bit immediate
12211 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12212 %{
12213 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12214 effect(KILL cr);
12215
12216 format %{ "shrl $dst, $shift" %}
12217 ins_encode %{
12218 __ shrl($dst$$Address, $shift$$constant);
12219 %}
12220 ins_pipe(ialu_mem_imm);
12221 %}
12222
12223 // Logical Shift Right by variable
12224 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12225 %{
12226 predicate(!VM_Version::supports_bmi2());
12227 match(Set dst (URShiftI dst shift));
12228 effect(KILL cr);
12229
12230 format %{ "shrl $dst, $shift" %}
12231 ins_encode %{
12232 __ shrl($dst$$Register);
12233 %}
12234 ins_pipe(ialu_reg_reg);
12235 %}
12236
12237 // Logical Shift Right by variable
12238 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12239 %{
12240 predicate(!VM_Version::supports_bmi2());
12241 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12242 effect(KILL cr);
12243
12244 format %{ "shrl $dst, $shift" %}
12245 ins_encode %{
12246 __ shrl($dst$$Address);
12247 %}
12248 ins_pipe(ialu_mem_reg);
12249 %}
12250
12251 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12252 %{
12253 predicate(VM_Version::supports_bmi2());
12254 match(Set dst (URShiftI src shift));
12255
12256 format %{ "shrxl $dst, $src, $shift" %}
12257 ins_encode %{
12258 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12259 %}
12260 ins_pipe(ialu_reg_reg);
12261 %}
12262
12263 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12264 %{
12265 predicate(VM_Version::supports_bmi2());
12266 match(Set dst (URShiftI (LoadI src) shift));
12267 ins_cost(175);
12268 format %{ "shrxl $dst, $src, $shift" %}
12269 ins_encode %{
12270 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12271 %}
12272 ins_pipe(ialu_reg_mem);
12273 %}
12274
12275 // Long Shift Instructions
12276 // Shift Left by one, two, three
12277 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12278 %{
12279 predicate(!UseAPX);
12280 match(Set dst (LShiftL dst shift));
12281 effect(KILL cr);
12282
12283 format %{ "salq $dst, $shift" %}
12284 ins_encode %{
12285 __ salq($dst$$Register, $shift$$constant);
12286 %}
12287 ins_pipe(ialu_reg);
12288 %}
12289
12290 // Shift Left by one, two, three
12291 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12292 %{
12293 predicate(UseAPX);
12294 match(Set dst (LShiftL src shift));
12295 effect(KILL cr);
12296 flag(PD::Flag_ndd_demotable);
12297
12298 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12299 ins_encode %{
12300 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12301 %}
12302 ins_pipe(ialu_reg);
12303 %}
12304
12305 // Shift Left by 8-bit immediate
12306 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12307 %{
12308 predicate(!UseAPX);
12309 match(Set dst (LShiftL dst shift));
12310 effect(KILL cr);
12311
12312 format %{ "salq $dst, $shift" %}
12313 ins_encode %{
12314 __ salq($dst$$Register, $shift$$constant);
12315 %}
12316 ins_pipe(ialu_reg);
12317 %}
12318
12319 // Shift Left by 8-bit immediate
12320 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12321 %{
12322 predicate(UseAPX);
12323 match(Set dst (LShiftL src shift));
12324 effect(KILL cr);
12325 flag(PD::Flag_ndd_demotable);
12326
12327 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12328 ins_encode %{
12329 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12330 %}
12331 ins_pipe(ialu_reg);
12332 %}
12333
12334 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12335 %{
12336 predicate(UseAPX);
12337 match(Set dst (LShiftL (LoadL src) shift));
12338 effect(KILL cr);
12339
12340 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12341 ins_encode %{
12342 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12343 %}
12344 ins_pipe(ialu_reg);
12345 %}
12346
12347 // Shift Left by 8-bit immediate
12348 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12349 %{
12350 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12351 effect(KILL cr);
12352
12353 format %{ "salq $dst, $shift" %}
12354 ins_encode %{
12355 __ salq($dst$$Address, $shift$$constant);
12356 %}
12357 ins_pipe(ialu_mem_imm);
12358 %}
12359
12360 // Shift Left by variable
12361 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12362 %{
12363 predicate(!VM_Version::supports_bmi2());
12364 match(Set dst (LShiftL dst shift));
12365 effect(KILL cr);
12366
12367 format %{ "salq $dst, $shift" %}
12368 ins_encode %{
12369 __ salq($dst$$Register);
12370 %}
12371 ins_pipe(ialu_reg_reg);
12372 %}
12373
12374 // Shift Left by variable
12375 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12376 %{
12377 predicate(!VM_Version::supports_bmi2());
12378 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12379 effect(KILL cr);
12380
12381 format %{ "salq $dst, $shift" %}
12382 ins_encode %{
12383 __ salq($dst$$Address);
12384 %}
12385 ins_pipe(ialu_mem_reg);
12386 %}
12387
12388 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12389 %{
12390 predicate(VM_Version::supports_bmi2());
12391 match(Set dst (LShiftL src shift));
12392
12393 format %{ "shlxq $dst, $src, $shift" %}
12394 ins_encode %{
12395 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12396 %}
12397 ins_pipe(ialu_reg_reg);
12398 %}
12399
12400 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12401 %{
12402 predicate(VM_Version::supports_bmi2());
12403 match(Set dst (LShiftL (LoadL src) shift));
12404 ins_cost(175);
12405 format %{ "shlxq $dst, $src, $shift" %}
12406 ins_encode %{
12407 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12408 %}
12409 ins_pipe(ialu_reg_mem);
12410 %}
12411
12412 // Arithmetic Shift Right by 8-bit immediate
12413 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12414 %{
12415 predicate(!UseAPX);
12416 match(Set dst (RShiftL dst shift));
12417 effect(KILL cr);
12418
12419 format %{ "sarq $dst, $shift" %}
12420 ins_encode %{
12421 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12422 %}
12423 ins_pipe(ialu_mem_imm);
12424 %}
12425
12426 // Arithmetic Shift Right by 8-bit immediate
12427 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12428 %{
12429 predicate(UseAPX);
12430 match(Set dst (RShiftL src shift));
12431 effect(KILL cr);
12432 flag(PD::Flag_ndd_demotable);
12433
12434 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12435 ins_encode %{
12436 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12437 %}
12438 ins_pipe(ialu_mem_imm);
12439 %}
12440
12441 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12442 %{
12443 predicate(UseAPX);
12444 match(Set dst (RShiftL (LoadL src) shift));
12445 effect(KILL cr);
12446
12447 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12448 ins_encode %{
12449 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12450 %}
12451 ins_pipe(ialu_mem_imm);
12452 %}
12453
12454 // Arithmetic Shift Right by 8-bit immediate
12455 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12456 %{
12457 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12458 effect(KILL cr);
12459
12460 format %{ "sarq $dst, $shift" %}
12461 ins_encode %{
12462 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12463 %}
12464 ins_pipe(ialu_mem_imm);
12465 %}
12466
12467 // Arithmetic Shift Right by variable
12468 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12469 %{
12470 predicate(!VM_Version::supports_bmi2());
12471 match(Set dst (RShiftL dst shift));
12472 effect(KILL cr);
12473
12474 format %{ "sarq $dst, $shift" %}
12475 ins_encode %{
12476 __ sarq($dst$$Register);
12477 %}
12478 ins_pipe(ialu_reg_reg);
12479 %}
12480
12481 // Arithmetic Shift Right by variable
12482 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12483 %{
12484 predicate(!VM_Version::supports_bmi2());
12485 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12486 effect(KILL cr);
12487
12488 format %{ "sarq $dst, $shift" %}
12489 ins_encode %{
12490 __ sarq($dst$$Address);
12491 %}
12492 ins_pipe(ialu_mem_reg);
12493 %}
12494
12495 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12496 %{
12497 predicate(VM_Version::supports_bmi2());
12498 match(Set dst (RShiftL src shift));
12499
12500 format %{ "sarxq $dst, $src, $shift" %}
12501 ins_encode %{
12502 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12503 %}
12504 ins_pipe(ialu_reg_reg);
12505 %}
12506
12507 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12508 %{
12509 predicate(VM_Version::supports_bmi2());
12510 match(Set dst (RShiftL (LoadL src) shift));
12511 ins_cost(175);
12512 format %{ "sarxq $dst, $src, $shift" %}
12513 ins_encode %{
12514 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12515 %}
12516 ins_pipe(ialu_reg_mem);
12517 %}
12518
12519 // Logical Shift Right by 8-bit immediate
12520 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12521 %{
12522 predicate(!UseAPX);
12523 match(Set dst (URShiftL dst shift));
12524 effect(KILL cr);
12525
12526 format %{ "shrq $dst, $shift" %}
12527 ins_encode %{
12528 __ shrq($dst$$Register, $shift$$constant);
12529 %}
12530 ins_pipe(ialu_reg);
12531 %}
12532
12533 // Logical Shift Right by 8-bit immediate
12534 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12535 %{
12536 predicate(UseAPX);
12537 match(Set dst (URShiftL src shift));
12538 effect(KILL cr);
12539 flag(PD::Flag_ndd_demotable);
12540
12541 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12542 ins_encode %{
12543 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12544 %}
12545 ins_pipe(ialu_reg);
12546 %}
12547
12548 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12549 %{
12550 predicate(UseAPX);
12551 match(Set dst (URShiftL (LoadL src) shift));
12552 effect(KILL cr);
12553
12554 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12555 ins_encode %{
12556 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12557 %}
12558 ins_pipe(ialu_reg);
12559 %}
12560
12561 // Logical Shift Right by 8-bit immediate
12562 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12563 %{
12564 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12565 effect(KILL cr);
12566
12567 format %{ "shrq $dst, $shift" %}
12568 ins_encode %{
12569 __ shrq($dst$$Address, $shift$$constant);
12570 %}
12571 ins_pipe(ialu_mem_imm);
12572 %}
12573
12574 // Logical Shift Right by variable
12575 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12576 %{
12577 predicate(!VM_Version::supports_bmi2());
12578 match(Set dst (URShiftL dst shift));
12579 effect(KILL cr);
12580
12581 format %{ "shrq $dst, $shift" %}
12582 ins_encode %{
12583 __ shrq($dst$$Register);
12584 %}
12585 ins_pipe(ialu_reg_reg);
12586 %}
12587
12588 // Logical Shift Right by variable
12589 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12590 %{
12591 predicate(!VM_Version::supports_bmi2());
12592 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12593 effect(KILL cr);
12594
12595 format %{ "shrq $dst, $shift" %}
12596 ins_encode %{
12597 __ shrq($dst$$Address);
12598 %}
12599 ins_pipe(ialu_mem_reg);
12600 %}
12601
12602 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12603 %{
12604 predicate(VM_Version::supports_bmi2());
12605 match(Set dst (URShiftL src shift));
12606
12607 format %{ "shrxq $dst, $src, $shift" %}
12608 ins_encode %{
12609 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12610 %}
12611 ins_pipe(ialu_reg_reg);
12612 %}
12613
12614 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12615 %{
12616 predicate(VM_Version::supports_bmi2());
12617 match(Set dst (URShiftL (LoadL src) shift));
12618 ins_cost(175);
12619 format %{ "shrxq $dst, $src, $shift" %}
12620 ins_encode %{
12621 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12622 %}
12623 ins_pipe(ialu_reg_mem);
12624 %}
12625
12626 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12627 // This idiom is used by the compiler for the i2b bytecode.
12628 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12629 %{
12630 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12631
12632 format %{ "movsbl $dst, $src\t# i2b" %}
12633 ins_encode %{
12634 __ movsbl($dst$$Register, $src$$Register);
12635 %}
12636 ins_pipe(ialu_reg_reg);
12637 %}
12638
12639 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12640 // This idiom is used by the compiler the i2s bytecode.
12641 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12642 %{
12643 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12644
12645 format %{ "movswl $dst, $src\t# i2s" %}
12646 ins_encode %{
12647 __ movswl($dst$$Register, $src$$Register);
12648 %}
12649 ins_pipe(ialu_reg_reg);
12650 %}
12651
12652 // ROL/ROR instructions
12653
12654 // Rotate left by constant.
12655 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12656 %{
12657 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12658 match(Set dst (RotateLeft dst shift));
12659 effect(KILL cr);
12660 format %{ "roll $dst, $shift" %}
12661 ins_encode %{
12662 __ roll($dst$$Register, $shift$$constant);
12663 %}
12664 ins_pipe(ialu_reg);
12665 %}
12666
12667 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12668 %{
12669 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12670 match(Set dst (RotateLeft src shift));
12671 format %{ "rolxl $dst, $src, $shift" %}
12672 ins_encode %{
12673 int shift = 32 - ($shift$$constant & 31);
12674 __ rorxl($dst$$Register, $src$$Register, shift);
12675 %}
12676 ins_pipe(ialu_reg_reg);
12677 %}
12678
12679 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12680 %{
12681 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12682 match(Set dst (RotateLeft (LoadI src) shift));
12683 ins_cost(175);
12684 format %{ "rolxl $dst, $src, $shift" %}
12685 ins_encode %{
12686 int shift = 32 - ($shift$$constant & 31);
12687 __ rorxl($dst$$Register, $src$$Address, shift);
12688 %}
12689 ins_pipe(ialu_reg_mem);
12690 %}
12691
12692 // Rotate Left by variable
12693 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12694 %{
12695 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12696 match(Set dst (RotateLeft dst shift));
12697 effect(KILL cr);
12698 format %{ "roll $dst, $shift" %}
12699 ins_encode %{
12700 __ roll($dst$$Register);
12701 %}
12702 ins_pipe(ialu_reg_reg);
12703 %}
12704
12705 // Rotate Left by variable
12706 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12707 %{
12708 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12709 match(Set dst (RotateLeft src shift));
12710 effect(KILL cr);
12711 flag(PD::Flag_ndd_demotable);
12712
12713 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12714 ins_encode %{
12715 __ eroll($dst$$Register, $src$$Register, false);
12716 %}
12717 ins_pipe(ialu_reg_reg);
12718 %}
12719
12720 // Rotate Right by constant.
12721 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12722 %{
12723 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12724 match(Set dst (RotateRight dst shift));
12725 effect(KILL cr);
12726 format %{ "rorl $dst, $shift" %}
12727 ins_encode %{
12728 __ rorl($dst$$Register, $shift$$constant);
12729 %}
12730 ins_pipe(ialu_reg);
12731 %}
12732
12733 // Rotate Right by constant.
12734 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12735 %{
12736 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12737 match(Set dst (RotateRight src shift));
12738 format %{ "rorxl $dst, $src, $shift" %}
12739 ins_encode %{
12740 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12741 %}
12742 ins_pipe(ialu_reg_reg);
12743 %}
12744
12745 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12746 %{
12747 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12748 match(Set dst (RotateRight (LoadI src) shift));
12749 ins_cost(175);
12750 format %{ "rorxl $dst, $src, $shift" %}
12751 ins_encode %{
12752 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12753 %}
12754 ins_pipe(ialu_reg_mem);
12755 %}
12756
12757 // Rotate Right by variable
12758 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12759 %{
12760 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12761 match(Set dst (RotateRight dst shift));
12762 effect(KILL cr);
12763 format %{ "rorl $dst, $shift" %}
12764 ins_encode %{
12765 __ rorl($dst$$Register);
12766 %}
12767 ins_pipe(ialu_reg_reg);
12768 %}
12769
12770 // Rotate Right by variable
12771 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12772 %{
12773 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12774 match(Set dst (RotateRight src shift));
12775 effect(KILL cr);
12776 flag(PD::Flag_ndd_demotable);
12777
12778 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12779 ins_encode %{
12780 __ erorl($dst$$Register, $src$$Register, false);
12781 %}
12782 ins_pipe(ialu_reg_reg);
12783 %}
12784
12785 // Rotate Left by constant.
12786 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12787 %{
12788 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12789 match(Set dst (RotateLeft dst shift));
12790 effect(KILL cr);
12791 format %{ "rolq $dst, $shift" %}
12792 ins_encode %{
12793 __ rolq($dst$$Register, $shift$$constant);
12794 %}
12795 ins_pipe(ialu_reg);
12796 %}
12797
12798 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12799 %{
12800 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12801 match(Set dst (RotateLeft src shift));
12802 format %{ "rolxq $dst, $src, $shift" %}
12803 ins_encode %{
12804 int shift = 64 - ($shift$$constant & 63);
12805 __ rorxq($dst$$Register, $src$$Register, shift);
12806 %}
12807 ins_pipe(ialu_reg_reg);
12808 %}
12809
12810 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12811 %{
12812 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12813 match(Set dst (RotateLeft (LoadL src) shift));
12814 ins_cost(175);
12815 format %{ "rolxq $dst, $src, $shift" %}
12816 ins_encode %{
12817 int shift = 64 - ($shift$$constant & 63);
12818 __ rorxq($dst$$Register, $src$$Address, shift);
12819 %}
12820 ins_pipe(ialu_reg_mem);
12821 %}
12822
12823 // Rotate Left by variable
12824 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12825 %{
12826 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12827 match(Set dst (RotateLeft dst shift));
12828 effect(KILL cr);
12829
12830 format %{ "rolq $dst, $shift" %}
12831 ins_encode %{
12832 __ rolq($dst$$Register);
12833 %}
12834 ins_pipe(ialu_reg_reg);
12835 %}
12836
12837 // Rotate Left by variable
12838 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12839 %{
12840 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12841 match(Set dst (RotateLeft src shift));
12842 effect(KILL cr);
12843 flag(PD::Flag_ndd_demotable);
12844
12845 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12846 ins_encode %{
12847 __ erolq($dst$$Register, $src$$Register, false);
12848 %}
12849 ins_pipe(ialu_reg_reg);
12850 %}
12851
12852 // Rotate Right by constant.
12853 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12854 %{
12855 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12856 match(Set dst (RotateRight dst shift));
12857 effect(KILL cr);
12858 format %{ "rorq $dst, $shift" %}
12859 ins_encode %{
12860 __ rorq($dst$$Register, $shift$$constant);
12861 %}
12862 ins_pipe(ialu_reg);
12863 %}
12864
12865 // Rotate Right by constant
12866 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12867 %{
12868 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12869 match(Set dst (RotateRight src shift));
12870 format %{ "rorxq $dst, $src, $shift" %}
12871 ins_encode %{
12872 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12873 %}
12874 ins_pipe(ialu_reg_reg);
12875 %}
12876
12877 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12878 %{
12879 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12880 match(Set dst (RotateRight (LoadL src) shift));
12881 ins_cost(175);
12882 format %{ "rorxq $dst, $src, $shift" %}
12883 ins_encode %{
12884 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12885 %}
12886 ins_pipe(ialu_reg_mem);
12887 %}
12888
12889 // Rotate Right by variable
12890 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12891 %{
12892 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12893 match(Set dst (RotateRight dst shift));
12894 effect(KILL cr);
12895 format %{ "rorq $dst, $shift" %}
12896 ins_encode %{
12897 __ rorq($dst$$Register);
12898 %}
12899 ins_pipe(ialu_reg_reg);
12900 %}
12901
12902 // Rotate Right by variable
12903 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12904 %{
12905 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12906 match(Set dst (RotateRight src shift));
12907 effect(KILL cr);
12908 flag(PD::Flag_ndd_demotable);
12909
12910 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12911 ins_encode %{
12912 __ erorq($dst$$Register, $src$$Register, false);
12913 %}
12914 ins_pipe(ialu_reg_reg);
12915 %}
12916
12917 //----------------------------- CompressBits/ExpandBits ------------------------
12918
12919 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12920 predicate(n->bottom_type()->isa_long());
12921 match(Set dst (CompressBits src mask));
12922 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12923 ins_encode %{
12924 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12925 %}
12926 ins_pipe( pipe_slow );
12927 %}
12928
12929 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12930 predicate(n->bottom_type()->isa_long());
12931 match(Set dst (ExpandBits src mask));
12932 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12933 ins_encode %{
12934 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12935 %}
12936 ins_pipe( pipe_slow );
12937 %}
12938
12939 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12940 predicate(n->bottom_type()->isa_long());
12941 match(Set dst (CompressBits src (LoadL mask)));
12942 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12943 ins_encode %{
12944 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12945 %}
12946 ins_pipe( pipe_slow );
12947 %}
12948
12949 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12950 predicate(n->bottom_type()->isa_long());
12951 match(Set dst (ExpandBits src (LoadL mask)));
12952 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12953 ins_encode %{
12954 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12955 %}
12956 ins_pipe( pipe_slow );
12957 %}
12958
12959
12960 // Logical Instructions
12961
12962 // Integer Logical Instructions
12963
12964 // And Instructions
12965 // And Register with Register
12966 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12967 %{
12968 predicate(!UseAPX);
12969 match(Set dst (AndI dst src));
12970 effect(KILL cr);
12971 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12972
12973 format %{ "andl $dst, $src\t# int" %}
12974 ins_encode %{
12975 __ andl($dst$$Register, $src$$Register);
12976 %}
12977 ins_pipe(ialu_reg_reg);
12978 %}
12979
12980 // And Register with Register using New Data Destination (NDD)
12981 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12982 %{
12983 predicate(UseAPX);
12984 match(Set dst (AndI src1 src2));
12985 effect(KILL cr);
12986 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
12987
12988 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12989 ins_encode %{
12990 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12991
12992 %}
12993 ins_pipe(ialu_reg_reg);
12994 %}
12995
12996 // And Register with Immediate 255
12997 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12998 %{
12999 match(Set dst (AndI src mask));
13000
13001 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13002 ins_encode %{
13003 __ movzbl($dst$$Register, $src$$Register);
13004 %}
13005 ins_pipe(ialu_reg);
13006 %}
13007
13008 // And Register with Immediate 255 and promote to long
13009 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13010 %{
13011 match(Set dst (ConvI2L (AndI src mask)));
13012
13013 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13014 ins_encode %{
13015 __ movzbl($dst$$Register, $src$$Register);
13016 %}
13017 ins_pipe(ialu_reg);
13018 %}
13019
13020 // And Register with Immediate 65535
13021 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13022 %{
13023 match(Set dst (AndI src mask));
13024
13025 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13026 ins_encode %{
13027 __ movzwl($dst$$Register, $src$$Register);
13028 %}
13029 ins_pipe(ialu_reg);
13030 %}
13031
13032 // And Register with Immediate 65535 and promote to long
13033 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13034 %{
13035 match(Set dst (ConvI2L (AndI src mask)));
13036
13037 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13038 ins_encode %{
13039 __ movzwl($dst$$Register, $src$$Register);
13040 %}
13041 ins_pipe(ialu_reg);
13042 %}
13043
13044 // Can skip int2long conversions after AND with small bitmask
13045 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13046 %{
13047 predicate(VM_Version::supports_bmi2());
13048 ins_cost(125);
13049 effect(TEMP tmp, KILL cr);
13050 match(Set dst (ConvI2L (AndI src mask)));
13051 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13052 ins_encode %{
13053 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13054 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13055 %}
13056 ins_pipe(ialu_reg_reg);
13057 %}
13058
13059 // And Register with Immediate
13060 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13061 %{
13062 predicate(!UseAPX);
13063 match(Set dst (AndI dst src));
13064 effect(KILL cr);
13065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13066
13067 format %{ "andl $dst, $src\t# int" %}
13068 ins_encode %{
13069 __ andl($dst$$Register, $src$$constant);
13070 %}
13071 ins_pipe(ialu_reg);
13072 %}
13073
13074 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13075 %{
13076 predicate(UseAPX);
13077 match(Set dst (AndI src1 src2));
13078 effect(KILL cr);
13079 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13080
13081 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13082 ins_encode %{
13083 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13084 %}
13085 ins_pipe(ialu_reg);
13086 %}
13087
13088 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13089 %{
13090 predicate(UseAPX);
13091 match(Set dst (AndI (LoadI src1) src2));
13092 effect(KILL cr);
13093 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13094
13095 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13096 ins_encode %{
13097 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13098 %}
13099 ins_pipe(ialu_reg);
13100 %}
13101
13102 // And Register with Memory
13103 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13104 %{
13105 predicate(!UseAPX);
13106 match(Set dst (AndI dst (LoadI src)));
13107 effect(KILL cr);
13108 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13109
13110 ins_cost(150);
13111 format %{ "andl $dst, $src\t# int" %}
13112 ins_encode %{
13113 __ andl($dst$$Register, $src$$Address);
13114 %}
13115 ins_pipe(ialu_reg_mem);
13116 %}
13117
13118 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13119 %{
13120 predicate(UseAPX);
13121 match(Set dst (AndI src1 (LoadI src2)));
13122 effect(KILL cr);
13123 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13124
13125 ins_cost(150);
13126 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13127 ins_encode %{
13128 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13129 %}
13130 ins_pipe(ialu_reg_mem);
13131 %}
13132
13133 // And Memory with Register
13134 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13135 %{
13136 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13137 effect(KILL cr);
13138 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13139
13140 ins_cost(150);
13141 format %{ "andb $dst, $src\t# byte" %}
13142 ins_encode %{
13143 __ andb($dst$$Address, $src$$Register);
13144 %}
13145 ins_pipe(ialu_mem_reg);
13146 %}
13147
13148 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13149 %{
13150 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13151 effect(KILL cr);
13152 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13153
13154 ins_cost(150);
13155 format %{ "andl $dst, $src\t# int" %}
13156 ins_encode %{
13157 __ andl($dst$$Address, $src$$Register);
13158 %}
13159 ins_pipe(ialu_mem_reg);
13160 %}
13161
13162 // And Memory with Immediate
13163 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13164 %{
13165 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13166 effect(KILL cr);
13167 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13168
13169 ins_cost(125);
13170 format %{ "andl $dst, $src\t# int" %}
13171 ins_encode %{
13172 __ andl($dst$$Address, $src$$constant);
13173 %}
13174 ins_pipe(ialu_mem_imm);
13175 %}
13176
13177 // BMI1 instructions
13178 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13179 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13180 predicate(UseBMI1Instructions);
13181 effect(KILL cr);
13182 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13183
13184 ins_cost(125);
13185 format %{ "andnl $dst, $src1, $src2" %}
13186
13187 ins_encode %{
13188 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13189 %}
13190 ins_pipe(ialu_reg_mem);
13191 %}
13192
13193 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13194 match(Set dst (AndI (XorI src1 minus_1) src2));
13195 predicate(UseBMI1Instructions);
13196 effect(KILL cr);
13197 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13198
13199 format %{ "andnl $dst, $src1, $src2" %}
13200
13201 ins_encode %{
13202 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13203 %}
13204 ins_pipe(ialu_reg);
13205 %}
13206
13207 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13208 match(Set dst (AndI (SubI imm_zero src) src));
13209 predicate(UseBMI1Instructions);
13210 effect(KILL cr);
13211 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13212
13213 format %{ "blsil $dst, $src" %}
13214
13215 ins_encode %{
13216 __ blsil($dst$$Register, $src$$Register);
13217 %}
13218 ins_pipe(ialu_reg);
13219 %}
13220
13221 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13222 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13223 predicate(UseBMI1Instructions);
13224 effect(KILL cr);
13225 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13226
13227 ins_cost(125);
13228 format %{ "blsil $dst, $src" %}
13229
13230 ins_encode %{
13231 __ blsil($dst$$Register, $src$$Address);
13232 %}
13233 ins_pipe(ialu_reg_mem);
13234 %}
13235
13236 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13237 %{
13238 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13239 predicate(UseBMI1Instructions);
13240 effect(KILL cr);
13241 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13242
13243 ins_cost(125);
13244 format %{ "blsmskl $dst, $src" %}
13245
13246 ins_encode %{
13247 __ blsmskl($dst$$Register, $src$$Address);
13248 %}
13249 ins_pipe(ialu_reg_mem);
13250 %}
13251
13252 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13253 %{
13254 match(Set dst (XorI (AddI src minus_1) src));
13255 predicate(UseBMI1Instructions);
13256 effect(KILL cr);
13257 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13258
13259 format %{ "blsmskl $dst, $src" %}
13260
13261 ins_encode %{
13262 __ blsmskl($dst$$Register, $src$$Register);
13263 %}
13264
13265 ins_pipe(ialu_reg);
13266 %}
13267
13268 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13269 %{
13270 match(Set dst (AndI (AddI src minus_1) src) );
13271 predicate(UseBMI1Instructions);
13272 effect(KILL cr);
13273 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13274
13275 format %{ "blsrl $dst, $src" %}
13276
13277 ins_encode %{
13278 __ blsrl($dst$$Register, $src$$Register);
13279 %}
13280
13281 ins_pipe(ialu_reg_mem);
13282 %}
13283
13284 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13285 %{
13286 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13287 predicate(UseBMI1Instructions);
13288 effect(KILL cr);
13289 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13290
13291 ins_cost(125);
13292 format %{ "blsrl $dst, $src" %}
13293
13294 ins_encode %{
13295 __ blsrl($dst$$Register, $src$$Address);
13296 %}
13297
13298 ins_pipe(ialu_reg);
13299 %}
13300
13301 // Or Instructions
13302 // Or Register with Register
13303 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13304 %{
13305 predicate(!UseAPX);
13306 match(Set dst (OrI dst src));
13307 effect(KILL cr);
13308 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13309
13310 format %{ "orl $dst, $src\t# int" %}
13311 ins_encode %{
13312 __ orl($dst$$Register, $src$$Register);
13313 %}
13314 ins_pipe(ialu_reg_reg);
13315 %}
13316
13317 // Or Register with Register using New Data Destination (NDD)
13318 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13319 %{
13320 predicate(UseAPX);
13321 match(Set dst (OrI src1 src2));
13322 effect(KILL cr);
13323 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13324
13325 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13326 ins_encode %{
13327 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13328 %}
13329 ins_pipe(ialu_reg_reg);
13330 %}
13331
13332 // Or Register with Immediate
13333 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13334 %{
13335 predicate(!UseAPX);
13336 match(Set dst (OrI dst src));
13337 effect(KILL cr);
13338 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13339
13340 format %{ "orl $dst, $src\t# int" %}
13341 ins_encode %{
13342 __ orl($dst$$Register, $src$$constant);
13343 %}
13344 ins_pipe(ialu_reg);
13345 %}
13346
13347 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13348 %{
13349 predicate(UseAPX);
13350 match(Set dst (OrI src1 src2));
13351 effect(KILL cr);
13352 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13353
13354 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13355 ins_encode %{
13356 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13357 %}
13358 ins_pipe(ialu_reg);
13359 %}
13360
13361 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13362 %{
13363 predicate(UseAPX);
13364 match(Set dst (OrI src1 src2));
13365 effect(KILL cr);
13366 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13367
13368 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13369 ins_encode %{
13370 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13371 %}
13372 ins_pipe(ialu_reg);
13373 %}
13374
13375 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13376 %{
13377 predicate(UseAPX);
13378 match(Set dst (OrI (LoadI src1) src2));
13379 effect(KILL cr);
13380 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13381
13382 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13383 ins_encode %{
13384 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13385 %}
13386 ins_pipe(ialu_reg);
13387 %}
13388
13389 // Or Register with Memory
13390 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13391 %{
13392 predicate(!UseAPX);
13393 match(Set dst (OrI dst (LoadI src)));
13394 effect(KILL cr);
13395 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13396
13397 ins_cost(150);
13398 format %{ "orl $dst, $src\t# int" %}
13399 ins_encode %{
13400 __ orl($dst$$Register, $src$$Address);
13401 %}
13402 ins_pipe(ialu_reg_mem);
13403 %}
13404
13405 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13406 %{
13407 predicate(UseAPX);
13408 match(Set dst (OrI src1 (LoadI src2)));
13409 effect(KILL cr);
13410 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13411
13412 ins_cost(150);
13413 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13414 ins_encode %{
13415 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13416 %}
13417 ins_pipe(ialu_reg_mem);
13418 %}
13419
13420 // Or Memory with Register
13421 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13422 %{
13423 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13424 effect(KILL cr);
13425 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13426
13427 ins_cost(150);
13428 format %{ "orb $dst, $src\t# byte" %}
13429 ins_encode %{
13430 __ orb($dst$$Address, $src$$Register);
13431 %}
13432 ins_pipe(ialu_mem_reg);
13433 %}
13434
13435 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13436 %{
13437 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13438 effect(KILL cr);
13439 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13440
13441 ins_cost(150);
13442 format %{ "orl $dst, $src\t# int" %}
13443 ins_encode %{
13444 __ orl($dst$$Address, $src$$Register);
13445 %}
13446 ins_pipe(ialu_mem_reg);
13447 %}
13448
13449 // Or Memory with Immediate
13450 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13451 %{
13452 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13453 effect(KILL cr);
13454 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13455
13456 ins_cost(125);
13457 format %{ "orl $dst, $src\t# int" %}
13458 ins_encode %{
13459 __ orl($dst$$Address, $src$$constant);
13460 %}
13461 ins_pipe(ialu_mem_imm);
13462 %}
13463
13464 // Xor Instructions
13465 // Xor Register with Register
13466 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13467 %{
13468 predicate(!UseAPX);
13469 match(Set dst (XorI dst src));
13470 effect(KILL cr);
13471 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13472
13473 format %{ "xorl $dst, $src\t# int" %}
13474 ins_encode %{
13475 __ xorl($dst$$Register, $src$$Register);
13476 %}
13477 ins_pipe(ialu_reg_reg);
13478 %}
13479
13480 // Xor Register with Register using New Data Destination (NDD)
13481 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13482 %{
13483 predicate(UseAPX);
13484 match(Set dst (XorI src1 src2));
13485 effect(KILL cr);
13486 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13487
13488 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13489 ins_encode %{
13490 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13491 %}
13492 ins_pipe(ialu_reg_reg);
13493 %}
13494
13495 // Xor Register with Immediate -1
13496 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13497 %{
13498 predicate(!UseAPX);
13499 match(Set dst (XorI dst imm));
13500
13501 format %{ "notl $dst" %}
13502 ins_encode %{
13503 __ notl($dst$$Register);
13504 %}
13505 ins_pipe(ialu_reg);
13506 %}
13507
13508 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13509 %{
13510 match(Set dst (XorI src imm));
13511 predicate(UseAPX);
13512 flag(PD::Flag_ndd_demotable);
13513
13514 format %{ "enotl $dst, $src" %}
13515 ins_encode %{
13516 __ enotl($dst$$Register, $src$$Register);
13517 %}
13518 ins_pipe(ialu_reg);
13519 %}
13520
13521 // Xor Register with Immediate
13522 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13523 %{
13524 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13525 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13526 match(Set dst (XorI dst src));
13527 effect(KILL cr);
13528 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13529
13530 format %{ "xorl $dst, $src\t# int" %}
13531 ins_encode %{
13532 __ xorl($dst$$Register, $src$$constant);
13533 %}
13534 ins_pipe(ialu_reg);
13535 %}
13536
13537 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13538 %{
13539 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13540 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13541 match(Set dst (XorI src1 src2));
13542 effect(KILL cr);
13543 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13544
13545 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13546 ins_encode %{
13547 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13548 %}
13549 ins_pipe(ialu_reg);
13550 %}
13551
13552 // Xor Memory with Immediate
13553 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13554 %{
13555 predicate(UseAPX);
13556 match(Set dst (XorI (LoadI src1) src2));
13557 effect(KILL cr);
13558 ins_cost(150);
13559 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13560
13561 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13562 ins_encode %{
13563 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13564 %}
13565 ins_pipe(ialu_reg);
13566 %}
13567
13568 // Xor Register with Memory
13569 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13570 %{
13571 predicate(!UseAPX);
13572 match(Set dst (XorI dst (LoadI src)));
13573 effect(KILL cr);
13574 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13575
13576 ins_cost(150);
13577 format %{ "xorl $dst, $src\t# int" %}
13578 ins_encode %{
13579 __ xorl($dst$$Register, $src$$Address);
13580 %}
13581 ins_pipe(ialu_reg_mem);
13582 %}
13583
13584 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13585 %{
13586 predicate(UseAPX);
13587 match(Set dst (XorI src1 (LoadI src2)));
13588 effect(KILL cr);
13589 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13590
13591 ins_cost(150);
13592 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13593 ins_encode %{
13594 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13595 %}
13596 ins_pipe(ialu_reg_mem);
13597 %}
13598
13599 // Xor Memory with Register
13600 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13601 %{
13602 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13603 effect(KILL cr);
13604 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13605
13606 ins_cost(150);
13607 format %{ "xorb $dst, $src\t# byte" %}
13608 ins_encode %{
13609 __ xorb($dst$$Address, $src$$Register);
13610 %}
13611 ins_pipe(ialu_mem_reg);
13612 %}
13613
13614 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13615 %{
13616 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13617 effect(KILL cr);
13618 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13619
13620 ins_cost(150);
13621 format %{ "xorl $dst, $src\t# int" %}
13622 ins_encode %{
13623 __ xorl($dst$$Address, $src$$Register);
13624 %}
13625 ins_pipe(ialu_mem_reg);
13626 %}
13627
13628 // Xor Memory with Immediate
13629 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13630 %{
13631 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13632 effect(KILL cr);
13633 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13634
13635 ins_cost(125);
13636 format %{ "xorl $dst, $src\t# int" %}
13637 ins_encode %{
13638 __ xorl($dst$$Address, $src$$constant);
13639 %}
13640 ins_pipe(ialu_mem_imm);
13641 %}
13642
13643
13644 // Long Logical Instructions
13645
13646 // And Instructions
13647 // And Register with Register
13648 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13649 %{
13650 predicate(!UseAPX);
13651 match(Set dst (AndL dst src));
13652 effect(KILL cr);
13653 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13654
13655 format %{ "andq $dst, $src\t# long" %}
13656 ins_encode %{
13657 __ andq($dst$$Register, $src$$Register);
13658 %}
13659 ins_pipe(ialu_reg_reg);
13660 %}
13661
13662 // And Register with Register using New Data Destination (NDD)
13663 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13664 %{
13665 predicate(UseAPX);
13666 match(Set dst (AndL src1 src2));
13667 effect(KILL cr);
13668 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13669
13670 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13671 ins_encode %{
13672 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13673
13674 %}
13675 ins_pipe(ialu_reg_reg);
13676 %}
13677
13678 // And Register with Immediate 255
13679 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13680 %{
13681 match(Set dst (AndL src mask));
13682
13683 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13684 ins_encode %{
13685 // movzbl zeroes out the upper 32-bit and does not need REX.W
13686 __ movzbl($dst$$Register, $src$$Register);
13687 %}
13688 ins_pipe(ialu_reg);
13689 %}
13690
13691 // And Register with Immediate 65535
13692 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13693 %{
13694 match(Set dst (AndL src mask));
13695
13696 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13697 ins_encode %{
13698 // movzwl zeroes out the upper 32-bit and does not need REX.W
13699 __ movzwl($dst$$Register, $src$$Register);
13700 %}
13701 ins_pipe(ialu_reg);
13702 %}
13703
13704 // And Register with Immediate
13705 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13706 %{
13707 predicate(!UseAPX);
13708 match(Set dst (AndL dst src));
13709 effect(KILL cr);
13710 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13711
13712 format %{ "andq $dst, $src\t# long" %}
13713 ins_encode %{
13714 __ andq($dst$$Register, $src$$constant);
13715 %}
13716 ins_pipe(ialu_reg);
13717 %}
13718
13719 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13720 %{
13721 predicate(UseAPX);
13722 match(Set dst (AndL src1 src2));
13723 effect(KILL cr);
13724 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13725
13726 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13727 ins_encode %{
13728 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13729 %}
13730 ins_pipe(ialu_reg);
13731 %}
13732
13733 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13734 %{
13735 predicate(UseAPX);
13736 match(Set dst (AndL (LoadL src1) src2));
13737 effect(KILL cr);
13738 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13739
13740 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13741 ins_encode %{
13742 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13743 %}
13744 ins_pipe(ialu_reg);
13745 %}
13746
13747 // And Register with Memory
13748 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13749 %{
13750 predicate(!UseAPX);
13751 match(Set dst (AndL dst (LoadL src)));
13752 effect(KILL cr);
13753 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13754
13755 ins_cost(150);
13756 format %{ "andq $dst, $src\t# long" %}
13757 ins_encode %{
13758 __ andq($dst$$Register, $src$$Address);
13759 %}
13760 ins_pipe(ialu_reg_mem);
13761 %}
13762
13763 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13764 %{
13765 predicate(UseAPX);
13766 match(Set dst (AndL src1 (LoadL src2)));
13767 effect(KILL cr);
13768 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13769
13770 ins_cost(150);
13771 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13772 ins_encode %{
13773 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13774 %}
13775 ins_pipe(ialu_reg_mem);
13776 %}
13777
13778 // And Memory with Register
13779 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13780 %{
13781 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13782 effect(KILL cr);
13783 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13784
13785 ins_cost(150);
13786 format %{ "andq $dst, $src\t# long" %}
13787 ins_encode %{
13788 __ andq($dst$$Address, $src$$Register);
13789 %}
13790 ins_pipe(ialu_mem_reg);
13791 %}
13792
13793 // And Memory with Immediate
13794 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13795 %{
13796 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13797 effect(KILL cr);
13798 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13799
13800 ins_cost(125);
13801 format %{ "andq $dst, $src\t# long" %}
13802 ins_encode %{
13803 __ andq($dst$$Address, $src$$constant);
13804 %}
13805 ins_pipe(ialu_mem_imm);
13806 %}
13807
13808 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13809 %{
13810 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13811 // because AND/OR works well enough for 8/32-bit values.
13812 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13813
13814 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13815 effect(KILL cr);
13816
13817 ins_cost(125);
13818 format %{ "btrq $dst, log2(not($con))\t# long" %}
13819 ins_encode %{
13820 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13821 %}
13822 ins_pipe(ialu_mem_imm);
13823 %}
13824
13825 // BMI1 instructions
13826 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13827 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13828 predicate(UseBMI1Instructions);
13829 effect(KILL cr);
13830 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13831
13832 ins_cost(125);
13833 format %{ "andnq $dst, $src1, $src2" %}
13834
13835 ins_encode %{
13836 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13837 %}
13838 ins_pipe(ialu_reg_mem);
13839 %}
13840
13841 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13842 match(Set dst (AndL (XorL src1 minus_1) src2));
13843 predicate(UseBMI1Instructions);
13844 effect(KILL cr);
13845 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13846
13847 format %{ "andnq $dst, $src1, $src2" %}
13848
13849 ins_encode %{
13850 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13851 %}
13852 ins_pipe(ialu_reg_mem);
13853 %}
13854
13855 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13856 match(Set dst (AndL (SubL imm_zero src) src));
13857 predicate(UseBMI1Instructions);
13858 effect(KILL cr);
13859 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13860
13861 format %{ "blsiq $dst, $src" %}
13862
13863 ins_encode %{
13864 __ blsiq($dst$$Register, $src$$Register);
13865 %}
13866 ins_pipe(ialu_reg);
13867 %}
13868
13869 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13870 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13871 predicate(UseBMI1Instructions);
13872 effect(KILL cr);
13873 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13874
13875 ins_cost(125);
13876 format %{ "blsiq $dst, $src" %}
13877
13878 ins_encode %{
13879 __ blsiq($dst$$Register, $src$$Address);
13880 %}
13881 ins_pipe(ialu_reg_mem);
13882 %}
13883
13884 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13885 %{
13886 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13887 predicate(UseBMI1Instructions);
13888 effect(KILL cr);
13889 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13890
13891 ins_cost(125);
13892 format %{ "blsmskq $dst, $src" %}
13893
13894 ins_encode %{
13895 __ blsmskq($dst$$Register, $src$$Address);
13896 %}
13897 ins_pipe(ialu_reg_mem);
13898 %}
13899
13900 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13901 %{
13902 match(Set dst (XorL (AddL src minus_1) src));
13903 predicate(UseBMI1Instructions);
13904 effect(KILL cr);
13905 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13906
13907 format %{ "blsmskq $dst, $src" %}
13908
13909 ins_encode %{
13910 __ blsmskq($dst$$Register, $src$$Register);
13911 %}
13912
13913 ins_pipe(ialu_reg);
13914 %}
13915
13916 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13917 %{
13918 match(Set dst (AndL (AddL src minus_1) src) );
13919 predicate(UseBMI1Instructions);
13920 effect(KILL cr);
13921 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13922
13923 format %{ "blsrq $dst, $src" %}
13924
13925 ins_encode %{
13926 __ blsrq($dst$$Register, $src$$Register);
13927 %}
13928
13929 ins_pipe(ialu_reg);
13930 %}
13931
13932 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13933 %{
13934 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13935 predicate(UseBMI1Instructions);
13936 effect(KILL cr);
13937 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13938
13939 ins_cost(125);
13940 format %{ "blsrq $dst, $src" %}
13941
13942 ins_encode %{
13943 __ blsrq($dst$$Register, $src$$Address);
13944 %}
13945
13946 ins_pipe(ialu_reg);
13947 %}
13948
13949 // Or Instructions
13950 // Or Register with Register
13951 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13952 %{
13953 predicate(!UseAPX);
13954 match(Set dst (OrL dst src));
13955 effect(KILL cr);
13956 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13957
13958 format %{ "orq $dst, $src\t# long" %}
13959 ins_encode %{
13960 __ orq($dst$$Register, $src$$Register);
13961 %}
13962 ins_pipe(ialu_reg_reg);
13963 %}
13964
13965 // Or Register with Register using New Data Destination (NDD)
13966 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13967 %{
13968 predicate(UseAPX);
13969 match(Set dst (OrL src1 src2));
13970 effect(KILL cr);
13971 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13972
13973 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13974 ins_encode %{
13975 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13976
13977 %}
13978 ins_pipe(ialu_reg_reg);
13979 %}
13980
13981 // Use any_RegP to match R15 (TLS register) without spilling.
13982 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13983 match(Set dst (OrL dst (CastP2X src)));
13984 effect(KILL cr);
13985 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986
13987 format %{ "orq $dst, $src\t# long" %}
13988 ins_encode %{
13989 __ orq($dst$$Register, $src$$Register);
13990 %}
13991 ins_pipe(ialu_reg_reg);
13992 %}
13993
13994 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13995 match(Set dst (OrL src1 (CastP2X src2)));
13996 effect(KILL cr);
13997 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13998
13999 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14000 ins_encode %{
14001 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14002 %}
14003 ins_pipe(ialu_reg_reg);
14004 %}
14005
14006 // Or Register with Immediate
14007 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14008 %{
14009 predicate(!UseAPX);
14010 match(Set dst (OrL dst src));
14011 effect(KILL cr);
14012 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14013
14014 format %{ "orq $dst, $src\t# long" %}
14015 ins_encode %{
14016 __ orq($dst$$Register, $src$$constant);
14017 %}
14018 ins_pipe(ialu_reg);
14019 %}
14020
14021 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14022 %{
14023 predicate(UseAPX);
14024 match(Set dst (OrL src1 src2));
14025 effect(KILL cr);
14026 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14027
14028 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14029 ins_encode %{
14030 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14031 %}
14032 ins_pipe(ialu_reg);
14033 %}
14034
14035 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14036 %{
14037 predicate(UseAPX);
14038 match(Set dst (OrL src1 src2));
14039 effect(KILL cr);
14040 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14041
14042 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14043 ins_encode %{
14044 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14045 %}
14046 ins_pipe(ialu_reg);
14047 %}
14048
14049 // Or Memory with Immediate
14050 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14051 %{
14052 predicate(UseAPX);
14053 match(Set dst (OrL (LoadL src1) src2));
14054 effect(KILL cr);
14055 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056
14057 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14058 ins_encode %{
14059 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14060 %}
14061 ins_pipe(ialu_reg);
14062 %}
14063
14064 // Or Register with Memory
14065 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14066 %{
14067 predicate(!UseAPX);
14068 match(Set dst (OrL dst (LoadL src)));
14069 effect(KILL cr);
14070 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14071
14072 ins_cost(150);
14073 format %{ "orq $dst, $src\t# long" %}
14074 ins_encode %{
14075 __ orq($dst$$Register, $src$$Address);
14076 %}
14077 ins_pipe(ialu_reg_mem);
14078 %}
14079
14080 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14081 %{
14082 predicate(UseAPX);
14083 match(Set dst (OrL src1 (LoadL src2)));
14084 effect(KILL cr);
14085 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14086
14087 ins_cost(150);
14088 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14089 ins_encode %{
14090 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14091 %}
14092 ins_pipe(ialu_reg_mem);
14093 %}
14094
14095 // Or Memory with Register
14096 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14097 %{
14098 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14099 effect(KILL cr);
14100 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14101
14102 ins_cost(150);
14103 format %{ "orq $dst, $src\t# long" %}
14104 ins_encode %{
14105 __ orq($dst$$Address, $src$$Register);
14106 %}
14107 ins_pipe(ialu_mem_reg);
14108 %}
14109
14110 // Or Memory with Immediate
14111 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14112 %{
14113 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14114 effect(KILL cr);
14115 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14116
14117 ins_cost(125);
14118 format %{ "orq $dst, $src\t# long" %}
14119 ins_encode %{
14120 __ orq($dst$$Address, $src$$constant);
14121 %}
14122 ins_pipe(ialu_mem_imm);
14123 %}
14124
14125 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14126 %{
14127 // con should be a pure 64-bit power of 2 immediate
14128 // because AND/OR works well enough for 8/32-bit values.
14129 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14130
14131 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14132 effect(KILL cr);
14133
14134 ins_cost(125);
14135 format %{ "btsq $dst, log2($con)\t# long" %}
14136 ins_encode %{
14137 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14138 %}
14139 ins_pipe(ialu_mem_imm);
14140 %}
14141
14142 // Xor Instructions
14143 // Xor Register with Register
14144 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14145 %{
14146 predicate(!UseAPX);
14147 match(Set dst (XorL dst src));
14148 effect(KILL cr);
14149 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14150
14151 format %{ "xorq $dst, $src\t# long" %}
14152 ins_encode %{
14153 __ xorq($dst$$Register, $src$$Register);
14154 %}
14155 ins_pipe(ialu_reg_reg);
14156 %}
14157
14158 // Xor Register with Register using New Data Destination (NDD)
14159 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14160 %{
14161 predicate(UseAPX);
14162 match(Set dst (XorL src1 src2));
14163 effect(KILL cr);
14164 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14165
14166 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14167 ins_encode %{
14168 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14169 %}
14170 ins_pipe(ialu_reg_reg);
14171 %}
14172
14173 // Xor Register with Immediate -1
14174 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14175 %{
14176 predicate(!UseAPX);
14177 match(Set dst (XorL dst imm));
14178
14179 format %{ "notq $dst" %}
14180 ins_encode %{
14181 __ notq($dst$$Register);
14182 %}
14183 ins_pipe(ialu_reg);
14184 %}
14185
14186 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14187 %{
14188 predicate(UseAPX);
14189 match(Set dst (XorL src imm));
14190 flag(PD::Flag_ndd_demotable);
14191
14192 format %{ "enotq $dst, $src" %}
14193 ins_encode %{
14194 __ enotq($dst$$Register, $src$$Register);
14195 %}
14196 ins_pipe(ialu_reg);
14197 %}
14198
14199 // Xor Register with Immediate
14200 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14201 %{
14202 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14203 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14204 match(Set dst (XorL dst src));
14205 effect(KILL cr);
14206 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14207
14208 format %{ "xorq $dst, $src\t# long" %}
14209 ins_encode %{
14210 __ xorq($dst$$Register, $src$$constant);
14211 %}
14212 ins_pipe(ialu_reg);
14213 %}
14214
14215 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14216 %{
14217 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14218 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14219 match(Set dst (XorL src1 src2));
14220 effect(KILL cr);
14221 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14222
14223 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14224 ins_encode %{
14225 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14226 %}
14227 ins_pipe(ialu_reg);
14228 %}
14229
14230 // Xor Memory with Immediate
14231 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14232 %{
14233 predicate(UseAPX);
14234 match(Set dst (XorL (LoadL src1) src2));
14235 effect(KILL cr);
14236 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14237 ins_cost(150);
14238
14239 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14240 ins_encode %{
14241 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14242 %}
14243 ins_pipe(ialu_reg);
14244 %}
14245
14246 // Xor Register with Memory
14247 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14248 %{
14249 predicate(!UseAPX);
14250 match(Set dst (XorL dst (LoadL src)));
14251 effect(KILL cr);
14252 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14253
14254 ins_cost(150);
14255 format %{ "xorq $dst, $src\t# long" %}
14256 ins_encode %{
14257 __ xorq($dst$$Register, $src$$Address);
14258 %}
14259 ins_pipe(ialu_reg_mem);
14260 %}
14261
14262 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14263 %{
14264 predicate(UseAPX);
14265 match(Set dst (XorL src1 (LoadL src2)));
14266 effect(KILL cr);
14267 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14268
14269 ins_cost(150);
14270 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14271 ins_encode %{
14272 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14273 %}
14274 ins_pipe(ialu_reg_mem);
14275 %}
14276
14277 // Xor Memory with Register
14278 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14279 %{
14280 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14281 effect(KILL cr);
14282 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14283
14284 ins_cost(150);
14285 format %{ "xorq $dst, $src\t# long" %}
14286 ins_encode %{
14287 __ xorq($dst$$Address, $src$$Register);
14288 %}
14289 ins_pipe(ialu_mem_reg);
14290 %}
14291
14292 // Xor Memory with Immediate
14293 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14294 %{
14295 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14296 effect(KILL cr);
14297 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14298
14299 ins_cost(125);
14300 format %{ "xorq $dst, $src\t# long" %}
14301 ins_encode %{
14302 __ xorq($dst$$Address, $src$$constant);
14303 %}
14304 ins_pipe(ialu_mem_imm);
14305 %}
14306
14307 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14308 %{
14309 match(Set dst (CmpLTMask p q));
14310 effect(KILL cr);
14311
14312 ins_cost(400);
14313 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14314 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14315 "negl $dst" %}
14316 ins_encode %{
14317 __ cmpl($p$$Register, $q$$Register);
14318 __ setcc(Assembler::less, $dst$$Register);
14319 __ negl($dst$$Register);
14320 %}
14321 ins_pipe(pipe_slow);
14322 %}
14323
14324 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14325 %{
14326 match(Set dst (CmpLTMask dst zero));
14327 effect(KILL cr);
14328
14329 ins_cost(100);
14330 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14331 ins_encode %{
14332 __ sarl($dst$$Register, 31);
14333 %}
14334 ins_pipe(ialu_reg);
14335 %}
14336
14337 /* Better to save a register than avoid a branch */
14338 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14339 %{
14340 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14341 effect(KILL cr);
14342 ins_cost(300);
14343 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14344 "jge done\n\t"
14345 "addl $p,$y\n"
14346 "done: " %}
14347 ins_encode %{
14348 Register Rp = $p$$Register;
14349 Register Rq = $q$$Register;
14350 Register Ry = $y$$Register;
14351 Label done;
14352 __ subl(Rp, Rq);
14353 __ jccb(Assembler::greaterEqual, done);
14354 __ addl(Rp, Ry);
14355 __ bind(done);
14356 %}
14357 ins_pipe(pipe_cmplt);
14358 %}
14359
14360 /* Better to save a register than avoid a branch */
14361 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14362 %{
14363 match(Set y (AndI (CmpLTMask p q) y));
14364 effect(KILL cr);
14365
14366 ins_cost(300);
14367
14368 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14369 "jlt done\n\t"
14370 "xorl $y, $y\n"
14371 "done: " %}
14372 ins_encode %{
14373 Register Rp = $p$$Register;
14374 Register Rq = $q$$Register;
14375 Register Ry = $y$$Register;
14376 Label done;
14377 __ cmpl(Rp, Rq);
14378 __ jccb(Assembler::less, done);
14379 __ xorl(Ry, Ry);
14380 __ bind(done);
14381 %}
14382 ins_pipe(pipe_cmplt);
14383 %}
14384
14385
14386 //---------- FP Instructions------------------------------------------------
14387
14388 // Really expensive, avoid
14389 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14390 %{
14391 match(Set cr (CmpF src1 src2));
14392
14393 ins_cost(500);
14394 format %{ "ucomiss $src1, $src2\n\t"
14395 "jnp,s exit\n\t"
14396 "pushfq\t# saw NaN, set CF\n\t"
14397 "andq [rsp], #0xffffff2b\n\t"
14398 "popfq\n"
14399 "exit:" %}
14400 ins_encode %{
14401 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14402 emit_cmpfp_fixup(masm);
14403 %}
14404 ins_pipe(pipe_slow);
14405 %}
14406
14407 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14408 match(Set cr (CmpF src1 src2));
14409
14410 ins_cost(100);
14411 format %{ "ucomiss $src1, $src2" %}
14412 ins_encode %{
14413 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14414 %}
14415 ins_pipe(pipe_slow);
14416 %}
14417
14418 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14419 match(Set cr (CmpF src1 (LoadF src2)));
14420
14421 ins_cost(100);
14422 format %{ "ucomiss $src1, $src2" %}
14423 ins_encode %{
14424 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14425 %}
14426 ins_pipe(pipe_slow);
14427 %}
14428
14429 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14430 match(Set cr (CmpF src con));
14431 ins_cost(100);
14432 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14433 ins_encode %{
14434 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14435 %}
14436 ins_pipe(pipe_slow);
14437 %}
14438
14439 // Really expensive, avoid
14440 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14441 %{
14442 match(Set cr (CmpD src1 src2));
14443
14444 ins_cost(500);
14445 format %{ "ucomisd $src1, $src2\n\t"
14446 "jnp,s exit\n\t"
14447 "pushfq\t# saw NaN, set CF\n\t"
14448 "andq [rsp], #0xffffff2b\n\t"
14449 "popfq\n"
14450 "exit:" %}
14451 ins_encode %{
14452 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14453 emit_cmpfp_fixup(masm);
14454 %}
14455 ins_pipe(pipe_slow);
14456 %}
14457
14458 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14459 match(Set cr (CmpD src1 src2));
14460
14461 ins_cost(100);
14462 format %{ "ucomisd $src1, $src2 test" %}
14463 ins_encode %{
14464 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14465 %}
14466 ins_pipe(pipe_slow);
14467 %}
14468
14469 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14470 match(Set cr (CmpD src1 (LoadD src2)));
14471
14472 ins_cost(100);
14473 format %{ "ucomisd $src1, $src2" %}
14474 ins_encode %{
14475 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14476 %}
14477 ins_pipe(pipe_slow);
14478 %}
14479
14480 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14481 match(Set cr (CmpD src con));
14482 ins_cost(100);
14483 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14484 ins_encode %{
14485 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14486 %}
14487 ins_pipe(pipe_slow);
14488 %}
14489
14490 // Compare into -1,0,1
14491 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14492 %{
14493 match(Set dst (CmpF3 src1 src2));
14494 effect(KILL cr);
14495
14496 ins_cost(275);
14497 format %{ "ucomiss $src1, $src2\n\t"
14498 "movl $dst, #-1\n\t"
14499 "jp,s done\n\t"
14500 "jb,s done\n\t"
14501 "setne $dst\n\t"
14502 "movzbl $dst, $dst\n"
14503 "done:" %}
14504 ins_encode %{
14505 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14506 emit_cmpfp3(masm, $dst$$Register);
14507 %}
14508 ins_pipe(pipe_slow);
14509 %}
14510
14511 // Compare into -1,0,1
14512 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14513 %{
14514 match(Set dst (CmpF3 src1 (LoadF src2)));
14515 effect(KILL cr);
14516
14517 ins_cost(275);
14518 format %{ "ucomiss $src1, $src2\n\t"
14519 "movl $dst, #-1\n\t"
14520 "jp,s done\n\t"
14521 "jb,s done\n\t"
14522 "setne $dst\n\t"
14523 "movzbl $dst, $dst\n"
14524 "done:" %}
14525 ins_encode %{
14526 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14527 emit_cmpfp3(masm, $dst$$Register);
14528 %}
14529 ins_pipe(pipe_slow);
14530 %}
14531
14532 // Compare into -1,0,1
14533 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14534 match(Set dst (CmpF3 src con));
14535 effect(KILL cr);
14536
14537 ins_cost(275);
14538 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14539 "movl $dst, #-1\n\t"
14540 "jp,s done\n\t"
14541 "jb,s done\n\t"
14542 "setne $dst\n\t"
14543 "movzbl $dst, $dst\n"
14544 "done:" %}
14545 ins_encode %{
14546 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14547 emit_cmpfp3(masm, $dst$$Register);
14548 %}
14549 ins_pipe(pipe_slow);
14550 %}
14551
14552 // Compare into -1,0,1
14553 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14554 %{
14555 match(Set dst (CmpD3 src1 src2));
14556 effect(KILL cr);
14557
14558 ins_cost(275);
14559 format %{ "ucomisd $src1, $src2\n\t"
14560 "movl $dst, #-1\n\t"
14561 "jp,s done\n\t"
14562 "jb,s done\n\t"
14563 "setne $dst\n\t"
14564 "movzbl $dst, $dst\n"
14565 "done:" %}
14566 ins_encode %{
14567 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14568 emit_cmpfp3(masm, $dst$$Register);
14569 %}
14570 ins_pipe(pipe_slow);
14571 %}
14572
14573 // Compare into -1,0,1
14574 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14575 %{
14576 match(Set dst (CmpD3 src1 (LoadD src2)));
14577 effect(KILL cr);
14578
14579 ins_cost(275);
14580 format %{ "ucomisd $src1, $src2\n\t"
14581 "movl $dst, #-1\n\t"
14582 "jp,s done\n\t"
14583 "jb,s done\n\t"
14584 "setne $dst\n\t"
14585 "movzbl $dst, $dst\n"
14586 "done:" %}
14587 ins_encode %{
14588 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14589 emit_cmpfp3(masm, $dst$$Register);
14590 %}
14591 ins_pipe(pipe_slow);
14592 %}
14593
14594 // Compare into -1,0,1
14595 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14596 match(Set dst (CmpD3 src con));
14597 effect(KILL cr);
14598
14599 ins_cost(275);
14600 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14601 "movl $dst, #-1\n\t"
14602 "jp,s done\n\t"
14603 "jb,s done\n\t"
14604 "setne $dst\n\t"
14605 "movzbl $dst, $dst\n"
14606 "done:" %}
14607 ins_encode %{
14608 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14609 emit_cmpfp3(masm, $dst$$Register);
14610 %}
14611 ins_pipe(pipe_slow);
14612 %}
14613
14614 //----------Arithmetic Conversion Instructions---------------------------------
14615
14616 instruct convF2D_reg_reg(regD dst, regF src)
14617 %{
14618 match(Set dst (ConvF2D src));
14619
14620 format %{ "cvtss2sd $dst, $src" %}
14621 ins_encode %{
14622 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14623 %}
14624 ins_pipe(pipe_slow); // XXX
14625 %}
14626
14627 instruct convF2D_reg_mem(regD dst, memory src)
14628 %{
14629 predicate(UseAVX == 0);
14630 match(Set dst (ConvF2D (LoadF src)));
14631
14632 format %{ "cvtss2sd $dst, $src" %}
14633 ins_encode %{
14634 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14635 %}
14636 ins_pipe(pipe_slow); // XXX
14637 %}
14638
14639 instruct convD2F_reg_reg(regF dst, regD src)
14640 %{
14641 match(Set dst (ConvD2F src));
14642
14643 format %{ "cvtsd2ss $dst, $src" %}
14644 ins_encode %{
14645 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14646 %}
14647 ins_pipe(pipe_slow); // XXX
14648 %}
14649
14650 instruct convD2F_reg_mem(regF dst, memory src)
14651 %{
14652 predicate(UseAVX == 0);
14653 match(Set dst (ConvD2F (LoadD src)));
14654
14655 format %{ "cvtsd2ss $dst, $src" %}
14656 ins_encode %{
14657 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14658 %}
14659 ins_pipe(pipe_slow); // XXX
14660 %}
14661
14662 // XXX do mem variants
14663 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14664 %{
14665 predicate(!VM_Version::supports_avx10_2());
14666 match(Set dst (ConvF2I src));
14667 effect(KILL cr);
14668 format %{ "convert_f2i $dst, $src" %}
14669 ins_encode %{
14670 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14671 %}
14672 ins_pipe(pipe_slow);
14673 %}
14674
14675 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14676 %{
14677 predicate(VM_Version::supports_avx10_2());
14678 match(Set dst (ConvF2I src));
14679 format %{ "evcvttss2sisl $dst, $src" %}
14680 ins_encode %{
14681 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14682 %}
14683 ins_pipe(pipe_slow);
14684 %}
14685
14686 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14687 %{
14688 predicate(VM_Version::supports_avx10_2());
14689 match(Set dst (ConvF2I (LoadF src)));
14690 format %{ "evcvttss2sisl $dst, $src" %}
14691 ins_encode %{
14692 __ evcvttss2sisl($dst$$Register, $src$$Address);
14693 %}
14694 ins_pipe(pipe_slow);
14695 %}
14696
14697 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14698 %{
14699 predicate(!VM_Version::supports_avx10_2());
14700 match(Set dst (ConvF2L src));
14701 effect(KILL cr);
14702 format %{ "convert_f2l $dst, $src"%}
14703 ins_encode %{
14704 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14705 %}
14706 ins_pipe(pipe_slow);
14707 %}
14708
14709 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14710 %{
14711 predicate(VM_Version::supports_avx10_2());
14712 match(Set dst (ConvF2L src));
14713 format %{ "evcvttss2sisq $dst, $src" %}
14714 ins_encode %{
14715 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14716 %}
14717 ins_pipe(pipe_slow);
14718 %}
14719
14720 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14721 %{
14722 predicate(VM_Version::supports_avx10_2());
14723 match(Set dst (ConvF2L (LoadF src)));
14724 format %{ "evcvttss2sisq $dst, $src" %}
14725 ins_encode %{
14726 __ evcvttss2sisq($dst$$Register, $src$$Address);
14727 %}
14728 ins_pipe(pipe_slow);
14729 %}
14730
14731 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14732 %{
14733 predicate(!VM_Version::supports_avx10_2());
14734 match(Set dst (ConvD2I src));
14735 effect(KILL cr);
14736 format %{ "convert_d2i $dst, $src"%}
14737 ins_encode %{
14738 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14739 %}
14740 ins_pipe(pipe_slow);
14741 %}
14742
14743 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14744 %{
14745 predicate(VM_Version::supports_avx10_2());
14746 match(Set dst (ConvD2I src));
14747 format %{ "evcvttsd2sisl $dst, $src" %}
14748 ins_encode %{
14749 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14750 %}
14751 ins_pipe(pipe_slow);
14752 %}
14753
14754 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14755 %{
14756 predicate(VM_Version::supports_avx10_2());
14757 match(Set dst (ConvD2I (LoadD src)));
14758 format %{ "evcvttsd2sisl $dst, $src" %}
14759 ins_encode %{
14760 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14761 %}
14762 ins_pipe(pipe_slow);
14763 %}
14764
14765 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14766 %{
14767 predicate(!VM_Version::supports_avx10_2());
14768 match(Set dst (ConvD2L src));
14769 effect(KILL cr);
14770 format %{ "convert_d2l $dst, $src"%}
14771 ins_encode %{
14772 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14773 %}
14774 ins_pipe(pipe_slow);
14775 %}
14776
14777 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14778 %{
14779 predicate(VM_Version::supports_avx10_2());
14780 match(Set dst (ConvD2L src));
14781 format %{ "evcvttsd2sisq $dst, $src" %}
14782 ins_encode %{
14783 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14784 %}
14785 ins_pipe(pipe_slow);
14786 %}
14787
14788 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14789 %{
14790 predicate(VM_Version::supports_avx10_2());
14791 match(Set dst (ConvD2L (LoadD src)));
14792 format %{ "evcvttsd2sisq $dst, $src" %}
14793 ins_encode %{
14794 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14795 %}
14796 ins_pipe(pipe_slow);
14797 %}
14798
14799 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14800 %{
14801 match(Set dst (RoundD src));
14802 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14803 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14804 ins_encode %{
14805 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14806 %}
14807 ins_pipe(pipe_slow);
14808 %}
14809
14810 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14811 %{
14812 match(Set dst (RoundF src));
14813 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14814 format %{ "round_float $dst,$src" %}
14815 ins_encode %{
14816 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14817 %}
14818 ins_pipe(pipe_slow);
14819 %}
14820
14821 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14822 %{
14823 predicate(!UseXmmI2F);
14824 match(Set dst (ConvI2F src));
14825
14826 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14827 ins_encode %{
14828 if (UseAVX > 0) {
14829 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14830 }
14831 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14832 %}
14833 ins_pipe(pipe_slow); // XXX
14834 %}
14835
14836 instruct convI2F_reg_mem(regF dst, memory src)
14837 %{
14838 predicate(UseAVX == 0);
14839 match(Set dst (ConvI2F (LoadI src)));
14840
14841 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14842 ins_encode %{
14843 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14844 %}
14845 ins_pipe(pipe_slow); // XXX
14846 %}
14847
14848 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14849 %{
14850 predicate(!UseXmmI2D);
14851 match(Set dst (ConvI2D src));
14852
14853 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14854 ins_encode %{
14855 if (UseAVX > 0) {
14856 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14857 }
14858 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14859 %}
14860 ins_pipe(pipe_slow); // XXX
14861 %}
14862
14863 instruct convI2D_reg_mem(regD dst, memory src)
14864 %{
14865 predicate(UseAVX == 0);
14866 match(Set dst (ConvI2D (LoadI src)));
14867
14868 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14869 ins_encode %{
14870 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14871 %}
14872 ins_pipe(pipe_slow); // XXX
14873 %}
14874
14875 instruct convXI2F_reg(regF dst, rRegI src)
14876 %{
14877 predicate(UseXmmI2F);
14878 match(Set dst (ConvI2F src));
14879
14880 format %{ "movdl $dst, $src\n\t"
14881 "cvtdq2psl $dst, $dst\t# i2f" %}
14882 ins_encode %{
14883 __ movdl($dst$$XMMRegister, $src$$Register);
14884 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14885 %}
14886 ins_pipe(pipe_slow); // XXX
14887 %}
14888
14889 instruct convXI2D_reg(regD dst, rRegI src)
14890 %{
14891 predicate(UseXmmI2D);
14892 match(Set dst (ConvI2D src));
14893
14894 format %{ "movdl $dst, $src\n\t"
14895 "cvtdq2pdl $dst, $dst\t# i2d" %}
14896 ins_encode %{
14897 __ movdl($dst$$XMMRegister, $src$$Register);
14898 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14899 %}
14900 ins_pipe(pipe_slow); // XXX
14901 %}
14902
14903 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14904 %{
14905 match(Set dst (ConvL2F src));
14906
14907 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14908 ins_encode %{
14909 if (UseAVX > 0) {
14910 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14911 }
14912 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14913 %}
14914 ins_pipe(pipe_slow); // XXX
14915 %}
14916
14917 instruct convL2F_reg_mem(regF dst, memory src)
14918 %{
14919 predicate(UseAVX == 0);
14920 match(Set dst (ConvL2F (LoadL src)));
14921
14922 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14923 ins_encode %{
14924 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14925 %}
14926 ins_pipe(pipe_slow); // XXX
14927 %}
14928
14929 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14930 %{
14931 match(Set dst (ConvL2D src));
14932
14933 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14934 ins_encode %{
14935 if (UseAVX > 0) {
14936 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14937 }
14938 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14939 %}
14940 ins_pipe(pipe_slow); // XXX
14941 %}
14942
14943 instruct convL2D_reg_mem(regD dst, memory src)
14944 %{
14945 predicate(UseAVX == 0);
14946 match(Set dst (ConvL2D (LoadL src)));
14947
14948 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14949 ins_encode %{
14950 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14951 %}
14952 ins_pipe(pipe_slow); // XXX
14953 %}
14954
14955 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14956 %{
14957 match(Set dst (ConvI2L src));
14958
14959 ins_cost(125);
14960 format %{ "movslq $dst, $src\t# i2l" %}
14961 ins_encode %{
14962 __ movslq($dst$$Register, $src$$Register);
14963 %}
14964 ins_pipe(ialu_reg_reg);
14965 %}
14966
14967 // Zero-extend convert int to long
14968 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14969 %{
14970 match(Set dst (AndL (ConvI2L src) mask));
14971
14972 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14973 ins_encode %{
14974 if ($dst$$reg != $src$$reg) {
14975 __ movl($dst$$Register, $src$$Register);
14976 }
14977 %}
14978 ins_pipe(ialu_reg_reg);
14979 %}
14980
14981 // Zero-extend convert int to long
14982 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14983 %{
14984 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14985
14986 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14987 ins_encode %{
14988 __ movl($dst$$Register, $src$$Address);
14989 %}
14990 ins_pipe(ialu_reg_mem);
14991 %}
14992
14993 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14994 %{
14995 match(Set dst (AndL src mask));
14996
14997 format %{ "movl $dst, $src\t# zero-extend long" %}
14998 ins_encode %{
14999 __ movl($dst$$Register, $src$$Register);
15000 %}
15001 ins_pipe(ialu_reg_reg);
15002 %}
15003
15004 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15005 %{
15006 match(Set dst (ConvL2I src));
15007
15008 format %{ "movl $dst, $src\t# l2i" %}
15009 ins_encode %{
15010 __ movl($dst$$Register, $src$$Register);
15011 %}
15012 ins_pipe(ialu_reg_reg);
15013 %}
15014
15015
15016 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15017 match(Set dst (MoveF2I src));
15018 effect(DEF dst, USE src);
15019
15020 ins_cost(125);
15021 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15022 ins_encode %{
15023 __ movl($dst$$Register, Address(rsp, $src$$disp));
15024 %}
15025 ins_pipe(ialu_reg_mem);
15026 %}
15027
15028 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15029 match(Set dst (MoveI2F src));
15030 effect(DEF dst, USE src);
15031
15032 ins_cost(125);
15033 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15034 ins_encode %{
15035 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15036 %}
15037 ins_pipe(pipe_slow);
15038 %}
15039
15040 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15041 match(Set dst (MoveD2L src));
15042 effect(DEF dst, USE src);
15043
15044 ins_cost(125);
15045 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15046 ins_encode %{
15047 __ movq($dst$$Register, Address(rsp, $src$$disp));
15048 %}
15049 ins_pipe(ialu_reg_mem);
15050 %}
15051
15052 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15053 predicate(!UseXmmLoadAndClearUpper);
15054 match(Set dst (MoveL2D src));
15055 effect(DEF dst, USE src);
15056
15057 ins_cost(125);
15058 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15059 ins_encode %{
15060 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15061 %}
15062 ins_pipe(pipe_slow);
15063 %}
15064
15065 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15066 predicate(UseXmmLoadAndClearUpper);
15067 match(Set dst (MoveL2D src));
15068 effect(DEF dst, USE src);
15069
15070 ins_cost(125);
15071 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15072 ins_encode %{
15073 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15074 %}
15075 ins_pipe(pipe_slow);
15076 %}
15077
15078
15079 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15080 match(Set dst (MoveF2I src));
15081 effect(DEF dst, USE src);
15082
15083 ins_cost(95); // XXX
15084 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15085 ins_encode %{
15086 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15087 %}
15088 ins_pipe(pipe_slow);
15089 %}
15090
15091 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15092 match(Set dst (MoveI2F src));
15093 effect(DEF dst, USE src);
15094
15095 ins_cost(100);
15096 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15097 ins_encode %{
15098 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15099 %}
15100 ins_pipe( ialu_mem_reg );
15101 %}
15102
15103 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15104 match(Set dst (MoveD2L src));
15105 effect(DEF dst, USE src);
15106
15107 ins_cost(95); // XXX
15108 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15109 ins_encode %{
15110 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15111 %}
15112 ins_pipe(pipe_slow);
15113 %}
15114
15115 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15116 match(Set dst (MoveL2D src));
15117 effect(DEF dst, USE src);
15118
15119 ins_cost(100);
15120 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15121 ins_encode %{
15122 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15123 %}
15124 ins_pipe(ialu_mem_reg);
15125 %}
15126
15127 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15128 match(Set dst (MoveF2I src));
15129 effect(DEF dst, USE src);
15130 ins_cost(85);
15131 format %{ "movd $dst,$src\t# MoveF2I" %}
15132 ins_encode %{
15133 __ movdl($dst$$Register, $src$$XMMRegister);
15134 %}
15135 ins_pipe( pipe_slow );
15136 %}
15137
15138 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15139 match(Set dst (MoveD2L src));
15140 effect(DEF dst, USE src);
15141 ins_cost(85);
15142 format %{ "movd $dst,$src\t# MoveD2L" %}
15143 ins_encode %{
15144 __ movdq($dst$$Register, $src$$XMMRegister);
15145 %}
15146 ins_pipe( pipe_slow );
15147 %}
15148
15149 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15150 match(Set dst (MoveI2F src));
15151 effect(DEF dst, USE src);
15152 ins_cost(100);
15153 format %{ "movd $dst,$src\t# MoveI2F" %}
15154 ins_encode %{
15155 __ movdl($dst$$XMMRegister, $src$$Register);
15156 %}
15157 ins_pipe( pipe_slow );
15158 %}
15159
15160 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15161 match(Set dst (MoveL2D src));
15162 effect(DEF dst, USE src);
15163 ins_cost(100);
15164 format %{ "movd $dst,$src\t# MoveL2D" %}
15165 ins_encode %{
15166 __ movdq($dst$$XMMRegister, $src$$Register);
15167 %}
15168 ins_pipe( pipe_slow );
15169 %}
15170
15171
15172 // Fast clearing of an array
15173 // Small non-constant lenght ClearArray for non-AVX512 targets.
15174 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15175 Universe dummy, rFlagsReg cr)
15176 %{
15177 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15178 match(Set dummy (ClearArray (Binary cnt base) val));
15179 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15180
15181 format %{ $$template
15182 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15183 $$emit$$"jg LARGE\n\t"
15184 $$emit$$"dec rcx\n\t"
15185 $$emit$$"js DONE\t# Zero length\n\t"
15186 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15187 $$emit$$"dec rcx\n\t"
15188 $$emit$$"jge LOOP\n\t"
15189 $$emit$$"jmp DONE\n\t"
15190 $$emit$$"# LARGE:\n\t"
15191 if (UseFastStosb) {
15192 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15193 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15194 } else if (UseXMMForObjInit) {
15195 $$emit$$"movdq $tmp, $val\n\t"
15196 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15197 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15198 $$emit$$"jmpq L_zero_64_bytes\n\t"
15199 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15200 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15201 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15202 $$emit$$"add 0x40,rax\n\t"
15203 $$emit$$"# L_zero_64_bytes:\n\t"
15204 $$emit$$"sub 0x8,rcx\n\t"
15205 $$emit$$"jge L_loop\n\t"
15206 $$emit$$"add 0x4,rcx\n\t"
15207 $$emit$$"jl L_tail\n\t"
15208 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15209 $$emit$$"add 0x20,rax\n\t"
15210 $$emit$$"sub 0x4,rcx\n\t"
15211 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15212 $$emit$$"add 0x4,rcx\n\t"
15213 $$emit$$"jle L_end\n\t"
15214 $$emit$$"dec rcx\n\t"
15215 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15216 $$emit$$"vmovq xmm0,(rax)\n\t"
15217 $$emit$$"add 0x8,rax\n\t"
15218 $$emit$$"dec rcx\n\t"
15219 $$emit$$"jge L_sloop\n\t"
15220 $$emit$$"# L_end:\n\t"
15221 } else {
15222 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15223 }
15224 $$emit$$"# DONE"
15225 %}
15226 ins_encode %{
15227 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15228 $tmp$$XMMRegister, false, false);
15229 %}
15230 ins_pipe(pipe_slow);
15231 %}
15232
15233 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15234 Universe dummy, rFlagsReg cr)
15235 %{
15236 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15237 match(Set dummy (ClearArray (Binary cnt base) val));
15238 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15239
15240 format %{ $$template
15241 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15242 $$emit$$"jg LARGE\n\t"
15243 $$emit$$"dec rcx\n\t"
15244 $$emit$$"js DONE\t# Zero length\n\t"
15245 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15246 $$emit$$"dec rcx\n\t"
15247 $$emit$$"jge LOOP\n\t"
15248 $$emit$$"jmp DONE\n\t"
15249 $$emit$$"# LARGE:\n\t"
15250 if (UseXMMForObjInit) {
15251 $$emit$$"movdq $tmp, $val\n\t"
15252 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15253 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15254 $$emit$$"jmpq L_zero_64_bytes\n\t"
15255 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15256 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15257 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15258 $$emit$$"add 0x40,rax\n\t"
15259 $$emit$$"# L_zero_64_bytes:\n\t"
15260 $$emit$$"sub 0x8,rcx\n\t"
15261 $$emit$$"jge L_loop\n\t"
15262 $$emit$$"add 0x4,rcx\n\t"
15263 $$emit$$"jl L_tail\n\t"
15264 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15265 $$emit$$"add 0x20,rax\n\t"
15266 $$emit$$"sub 0x4,rcx\n\t"
15267 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15268 $$emit$$"add 0x4,rcx\n\t"
15269 $$emit$$"jle L_end\n\t"
15270 $$emit$$"dec rcx\n\t"
15271 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15272 $$emit$$"vmovq xmm0,(rax)\n\t"
15273 $$emit$$"add 0x8,rax\n\t"
15274 $$emit$$"dec rcx\n\t"
15275 $$emit$$"jge L_sloop\n\t"
15276 $$emit$$"# L_end:\n\t"
15277 } else {
15278 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15279 }
15280 $$emit$$"# DONE"
15281 %}
15282 ins_encode %{
15283 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15284 $tmp$$XMMRegister, false, true);
15285 %}
15286 ins_pipe(pipe_slow);
15287 %}
15288
15289 // Small non-constant length ClearArray for AVX512 targets.
15290 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15291 Universe dummy, rFlagsReg cr)
15292 %{
15293 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15294 match(Set dummy (ClearArray (Binary cnt base) val));
15295 ins_cost(125);
15296 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15297
15298 format %{ $$template
15299 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15300 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15301 $$emit$$"jg LARGE\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"js DONE\t# Zero length\n\t"
15304 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15305 $$emit$$"dec rcx\n\t"
15306 $$emit$$"jge LOOP\n\t"
15307 $$emit$$"jmp DONE\n\t"
15308 $$emit$$"# LARGE:\n\t"
15309 if (UseFastStosb) {
15310 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15311 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15312 } else if (UseXMMForObjInit) {
15313 $$emit$$"mov rdi,rax\n\t"
15314 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15315 $$emit$$"jmpq L_zero_64_bytes\n\t"
15316 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15317 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15318 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15319 $$emit$$"add 0x40,rax\n\t"
15320 $$emit$$"# L_zero_64_bytes:\n\t"
15321 $$emit$$"sub 0x8,rcx\n\t"
15322 $$emit$$"jge L_loop\n\t"
15323 $$emit$$"add 0x4,rcx\n\t"
15324 $$emit$$"jl L_tail\n\t"
15325 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15326 $$emit$$"add 0x20,rax\n\t"
15327 $$emit$$"sub 0x4,rcx\n\t"
15328 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15329 $$emit$$"add 0x4,rcx\n\t"
15330 $$emit$$"jle L_end\n\t"
15331 $$emit$$"dec rcx\n\t"
15332 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15333 $$emit$$"vmovq xmm0,(rax)\n\t"
15334 $$emit$$"add 0x8,rax\n\t"
15335 $$emit$$"dec rcx\n\t"
15336 $$emit$$"jge L_sloop\n\t"
15337 $$emit$$"# L_end:\n\t"
15338 } else {
15339 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15340 }
15341 $$emit$$"# DONE"
15342 %}
15343 ins_encode %{
15344 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15345 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15346 %}
15347 ins_pipe(pipe_slow);
15348 %}
15349
15350 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15351 Universe dummy, rFlagsReg cr)
15352 %{
15353 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15354 match(Set dummy (ClearArray (Binary cnt base) val));
15355 ins_cost(125);
15356 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15357
15358 format %{ $$template
15359 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15360 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15361 $$emit$$"jg LARGE\n\t"
15362 $$emit$$"dec rcx\n\t"
15363 $$emit$$"js DONE\t# Zero length\n\t"
15364 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15365 $$emit$$"dec rcx\n\t"
15366 $$emit$$"jge LOOP\n\t"
15367 $$emit$$"jmp DONE\n\t"
15368 $$emit$$"# LARGE:\n\t"
15369 if (UseFastStosb) {
15370 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15371 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15372 } else if (UseXMMForObjInit) {
15373 $$emit$$"mov rdi,rax\n\t"
15374 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15375 $$emit$$"jmpq L_zero_64_bytes\n\t"
15376 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15377 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15378 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15379 $$emit$$"add 0x40,rax\n\t"
15380 $$emit$$"# L_zero_64_bytes:\n\t"
15381 $$emit$$"sub 0x8,rcx\n\t"
15382 $$emit$$"jge L_loop\n\t"
15383 $$emit$$"add 0x4,rcx\n\t"
15384 $$emit$$"jl L_tail\n\t"
15385 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15386 $$emit$$"add 0x20,rax\n\t"
15387 $$emit$$"sub 0x4,rcx\n\t"
15388 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15389 $$emit$$"add 0x4,rcx\n\t"
15390 $$emit$$"jle L_end\n\t"
15391 $$emit$$"dec rcx\n\t"
15392 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15393 $$emit$$"vmovq xmm0,(rax)\n\t"
15394 $$emit$$"add 0x8,rax\n\t"
15395 $$emit$$"dec rcx\n\t"
15396 $$emit$$"jge L_sloop\n\t"
15397 $$emit$$"# L_end:\n\t"
15398 } else {
15399 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15400 }
15401 $$emit$$"# DONE"
15402 %}
15403 ins_encode %{
15404 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15405 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15406 %}
15407 ins_pipe(pipe_slow);
15408 %}
15409
15410 // Large non-constant length ClearArray for non-AVX512 targets.
15411 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15412 Universe dummy, rFlagsReg cr)
15413 %{
15414 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15415 match(Set dummy (ClearArray (Binary cnt base) val));
15416 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15417
15418 format %{ $$template
15419 if (UseFastStosb) {
15420 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15421 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15422 } else if (UseXMMForObjInit) {
15423 $$emit$$"movdq $tmp, $val\n\t"
15424 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15425 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15426 $$emit$$"jmpq L_zero_64_bytes\n\t"
15427 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15428 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15429 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15430 $$emit$$"add 0x40,rax\n\t"
15431 $$emit$$"# L_zero_64_bytes:\n\t"
15432 $$emit$$"sub 0x8,rcx\n\t"
15433 $$emit$$"jge L_loop\n\t"
15434 $$emit$$"add 0x4,rcx\n\t"
15435 $$emit$$"jl L_tail\n\t"
15436 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15437 $$emit$$"add 0x20,rax\n\t"
15438 $$emit$$"sub 0x4,rcx\n\t"
15439 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15440 $$emit$$"add 0x4,rcx\n\t"
15441 $$emit$$"jle L_end\n\t"
15442 $$emit$$"dec rcx\n\t"
15443 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15444 $$emit$$"vmovq xmm0,(rax)\n\t"
15445 $$emit$$"add 0x8,rax\n\t"
15446 $$emit$$"dec rcx\n\t"
15447 $$emit$$"jge L_sloop\n\t"
15448 $$emit$$"# L_end:\n\t"
15449 } else {
15450 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15451 }
15452 %}
15453 ins_encode %{
15454 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15455 $tmp$$XMMRegister, true, false);
15456 %}
15457 ins_pipe(pipe_slow);
15458 %}
15459
15460 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15461 Universe dummy, rFlagsReg cr)
15462 %{
15463 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15464 match(Set dummy (ClearArray (Binary cnt base) val));
15465 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15466
15467 format %{ $$template
15468 if (UseXMMForObjInit) {
15469 $$emit$$"movdq $tmp, $val\n\t"
15470 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15471 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15472 $$emit$$"jmpq L_zero_64_bytes\n\t"
15473 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15474 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15475 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15476 $$emit$$"add 0x40,rax\n\t"
15477 $$emit$$"# L_zero_64_bytes:\n\t"
15478 $$emit$$"sub 0x8,rcx\n\t"
15479 $$emit$$"jge L_loop\n\t"
15480 $$emit$$"add 0x4,rcx\n\t"
15481 $$emit$$"jl L_tail\n\t"
15482 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15483 $$emit$$"add 0x20,rax\n\t"
15484 $$emit$$"sub 0x4,rcx\n\t"
15485 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15486 $$emit$$"add 0x4,rcx\n\t"
15487 $$emit$$"jle L_end\n\t"
15488 $$emit$$"dec rcx\n\t"
15489 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15490 $$emit$$"vmovq xmm0,(rax)\n\t"
15491 $$emit$$"add 0x8,rax\n\t"
15492 $$emit$$"dec rcx\n\t"
15493 $$emit$$"jge L_sloop\n\t"
15494 $$emit$$"# L_end:\n\t"
15495 } else {
15496 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15497 }
15498 %}
15499 ins_encode %{
15500 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15501 $tmp$$XMMRegister, true, true);
15502 %}
15503 ins_pipe(pipe_slow);
15504 %}
15505
15506 // Large non-constant length ClearArray for AVX512 targets.
15507 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15508 Universe dummy, rFlagsReg cr)
15509 %{
15510 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15511 match(Set dummy (ClearArray (Binary cnt base) val));
15512 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15513
15514 format %{ $$template
15515 if (UseFastStosb) {
15516 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15517 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15518 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15519 } else if (UseXMMForObjInit) {
15520 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15521 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15522 $$emit$$"jmpq L_zero_64_bytes\n\t"
15523 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15524 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15525 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15526 $$emit$$"add 0x40,rax\n\t"
15527 $$emit$$"# L_zero_64_bytes:\n\t"
15528 $$emit$$"sub 0x8,rcx\n\t"
15529 $$emit$$"jge L_loop\n\t"
15530 $$emit$$"add 0x4,rcx\n\t"
15531 $$emit$$"jl L_tail\n\t"
15532 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15533 $$emit$$"add 0x20,rax\n\t"
15534 $$emit$$"sub 0x4,rcx\n\t"
15535 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15536 $$emit$$"add 0x4,rcx\n\t"
15537 $$emit$$"jle L_end\n\t"
15538 $$emit$$"dec rcx\n\t"
15539 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15540 $$emit$$"vmovq xmm0,(rax)\n\t"
15541 $$emit$$"add 0x8,rax\n\t"
15542 $$emit$$"dec rcx\n\t"
15543 $$emit$$"jge L_sloop\n\t"
15544 $$emit$$"# L_end:\n\t"
15545 } else {
15546 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15547 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15548 }
15549 %}
15550 ins_encode %{
15551 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15552 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15553 %}
15554 ins_pipe(pipe_slow);
15555 %}
15556
15557 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15558 Universe dummy, rFlagsReg cr)
15559 %{
15560 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15561 match(Set dummy (ClearArray (Binary cnt base) val));
15562 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15563
15564 format %{ $$template
15565 if (UseFastStosb) {
15566 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15567 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15568 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15569 } else if (UseXMMForObjInit) {
15570 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15571 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15572 $$emit$$"jmpq L_zero_64_bytes\n\t"
15573 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15574 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15575 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15576 $$emit$$"add 0x40,rax\n\t"
15577 $$emit$$"# L_zero_64_bytes:\n\t"
15578 $$emit$$"sub 0x8,rcx\n\t"
15579 $$emit$$"jge L_loop\n\t"
15580 $$emit$$"add 0x4,rcx\n\t"
15581 $$emit$$"jl L_tail\n\t"
15582 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15583 $$emit$$"add 0x20,rax\n\t"
15584 $$emit$$"sub 0x4,rcx\n\t"
15585 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15586 $$emit$$"add 0x4,rcx\n\t"
15587 $$emit$$"jle L_end\n\t"
15588 $$emit$$"dec rcx\n\t"
15589 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15590 $$emit$$"vmovq xmm0,(rax)\n\t"
15591 $$emit$$"add 0x8,rax\n\t"
15592 $$emit$$"dec rcx\n\t"
15593 $$emit$$"jge L_sloop\n\t"
15594 $$emit$$"# L_end:\n\t"
15595 } else {
15596 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15597 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15598 }
15599 %}
15600 ins_encode %{
15601 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15602 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15603 %}
15604 ins_pipe(pipe_slow);
15605 %}
15606
15607 // Small constant length ClearArray for AVX512 targets.
15608 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15609 %{
15610 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15611 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15612 match(Set dummy (ClearArray (Binary cnt base) val));
15613 ins_cost(100);
15614 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15615 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15616 ins_encode %{
15617 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15618 %}
15619 ins_pipe(pipe_slow);
15620 %}
15621
15622 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15623 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15624 %{
15625 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15626 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15627 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15628
15629 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15630 ins_encode %{
15631 __ string_compare($str1$$Register, $str2$$Register,
15632 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15633 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15634 %}
15635 ins_pipe( pipe_slow );
15636 %}
15637
15638 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15639 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15640 %{
15641 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15642 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15643 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15644
15645 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15646 ins_encode %{
15647 __ string_compare($str1$$Register, $str2$$Register,
15648 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15649 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15650 %}
15651 ins_pipe( pipe_slow );
15652 %}
15653
15654 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15655 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15656 %{
15657 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15658 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15659 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15660
15661 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15662 ins_encode %{
15663 __ string_compare($str1$$Register, $str2$$Register,
15664 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15665 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15666 %}
15667 ins_pipe( pipe_slow );
15668 %}
15669
15670 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15671 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15672 %{
15673 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15674 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15675 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15676
15677 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15678 ins_encode %{
15679 __ string_compare($str1$$Register, $str2$$Register,
15680 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15681 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15682 %}
15683 ins_pipe( pipe_slow );
15684 %}
15685
15686 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15687 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15688 %{
15689 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15690 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15691 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15692
15693 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15694 ins_encode %{
15695 __ string_compare($str1$$Register, $str2$$Register,
15696 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15697 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15698 %}
15699 ins_pipe( pipe_slow );
15700 %}
15701
15702 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15703 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15704 %{
15705 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15707 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15708
15709 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15710 ins_encode %{
15711 __ string_compare($str1$$Register, $str2$$Register,
15712 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15713 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15714 %}
15715 ins_pipe( pipe_slow );
15716 %}
15717
15718 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15719 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15720 %{
15721 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15722 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15723 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15724
15725 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15726 ins_encode %{
15727 __ string_compare($str2$$Register, $str1$$Register,
15728 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15729 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15730 %}
15731 ins_pipe( pipe_slow );
15732 %}
15733
15734 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15735 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15736 %{
15737 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15738 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15739 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15740
15741 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15742 ins_encode %{
15743 __ string_compare($str2$$Register, $str1$$Register,
15744 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15745 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15746 %}
15747 ins_pipe( pipe_slow );
15748 %}
15749
15750 // fast search of substring with known size.
15751 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15752 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15753 %{
15754 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15755 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15756 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15757
15758 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15759 ins_encode %{
15760 int icnt2 = (int)$int_cnt2$$constant;
15761 if (icnt2 >= 16) {
15762 // IndexOf for constant substrings with size >= 16 elements
15763 // which don't need to be loaded through stack.
15764 __ string_indexofC8($str1$$Register, $str2$$Register,
15765 $cnt1$$Register, $cnt2$$Register,
15766 icnt2, $result$$Register,
15767 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15768 } else {
15769 // Small strings are loaded through stack if they cross page boundary.
15770 __ string_indexof($str1$$Register, $str2$$Register,
15771 $cnt1$$Register, $cnt2$$Register,
15772 icnt2, $result$$Register,
15773 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15774 }
15775 %}
15776 ins_pipe( pipe_slow );
15777 %}
15778
15779 // fast search of substring with known size.
15780 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15781 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15782 %{
15783 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15784 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15785 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15786
15787 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15788 ins_encode %{
15789 int icnt2 = (int)$int_cnt2$$constant;
15790 if (icnt2 >= 8) {
15791 // IndexOf for constant substrings with size >= 8 elements
15792 // which don't need to be loaded through stack.
15793 __ string_indexofC8($str1$$Register, $str2$$Register,
15794 $cnt1$$Register, $cnt2$$Register,
15795 icnt2, $result$$Register,
15796 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15797 } else {
15798 // Small strings are loaded through stack if they cross page boundary.
15799 __ string_indexof($str1$$Register, $str2$$Register,
15800 $cnt1$$Register, $cnt2$$Register,
15801 icnt2, $result$$Register,
15802 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15803 }
15804 %}
15805 ins_pipe( pipe_slow );
15806 %}
15807
15808 // fast search of substring with known size.
15809 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15810 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15811 %{
15812 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15813 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15814 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15815
15816 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15817 ins_encode %{
15818 int icnt2 = (int)$int_cnt2$$constant;
15819 if (icnt2 >= 8) {
15820 // IndexOf for constant substrings with size >= 8 elements
15821 // which don't need to be loaded through stack.
15822 __ string_indexofC8($str1$$Register, $str2$$Register,
15823 $cnt1$$Register, $cnt2$$Register,
15824 icnt2, $result$$Register,
15825 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15826 } else {
15827 // Small strings are loaded through stack if they cross page boundary.
15828 __ string_indexof($str1$$Register, $str2$$Register,
15829 $cnt1$$Register, $cnt2$$Register,
15830 icnt2, $result$$Register,
15831 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15832 }
15833 %}
15834 ins_pipe( pipe_slow );
15835 %}
15836
15837 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15838 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15839 %{
15840 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15841 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15842 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15843
15844 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15845 ins_encode %{
15846 __ string_indexof($str1$$Register, $str2$$Register,
15847 $cnt1$$Register, $cnt2$$Register,
15848 (-1), $result$$Register,
15849 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15850 %}
15851 ins_pipe( pipe_slow );
15852 %}
15853
15854 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15855 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15856 %{
15857 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15858 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15859 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15860
15861 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15862 ins_encode %{
15863 __ string_indexof($str1$$Register, $str2$$Register,
15864 $cnt1$$Register, $cnt2$$Register,
15865 (-1), $result$$Register,
15866 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15867 %}
15868 ins_pipe( pipe_slow );
15869 %}
15870
15871 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15872 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15873 %{
15874 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15875 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15876 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15877
15878 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15879 ins_encode %{
15880 __ string_indexof($str1$$Register, $str2$$Register,
15881 $cnt1$$Register, $cnt2$$Register,
15882 (-1), $result$$Register,
15883 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15884 %}
15885 ins_pipe( pipe_slow );
15886 %}
15887
15888 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15889 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15890 %{
15891 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15892 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15893 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15894 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15895 ins_encode %{
15896 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15897 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15898 %}
15899 ins_pipe( pipe_slow );
15900 %}
15901
15902 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15903 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15904 %{
15905 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15906 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15907 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15908 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15909 ins_encode %{
15910 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15911 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15912 %}
15913 ins_pipe( pipe_slow );
15914 %}
15915
15916 // fast string equals
15917 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15918 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15919 %{
15920 predicate(!VM_Version::supports_avx512vlbw());
15921 match(Set result (StrEquals (Binary str1 str2) cnt));
15922 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15923
15924 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15925 ins_encode %{
15926 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15927 $cnt$$Register, $result$$Register, $tmp3$$Register,
15928 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15929 %}
15930 ins_pipe( pipe_slow );
15931 %}
15932
15933 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15934 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15935 %{
15936 predicate(VM_Version::supports_avx512vlbw());
15937 match(Set result (StrEquals (Binary str1 str2) cnt));
15938 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15939
15940 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15941 ins_encode %{
15942 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15943 $cnt$$Register, $result$$Register, $tmp3$$Register,
15944 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15945 %}
15946 ins_pipe( pipe_slow );
15947 %}
15948
15949 // fast array equals
15950 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15951 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15952 %{
15953 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15954 match(Set result (AryEq ary1 ary2));
15955 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15956
15957 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15958 ins_encode %{
15959 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15960 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15961 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15962 %}
15963 ins_pipe( pipe_slow );
15964 %}
15965
15966 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15967 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15968 %{
15969 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15970 match(Set result (AryEq ary1 ary2));
15971 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15972
15973 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15974 ins_encode %{
15975 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15976 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15977 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15978 %}
15979 ins_pipe( pipe_slow );
15980 %}
15981
15982 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15983 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15984 %{
15985 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15986 match(Set result (AryEq ary1 ary2));
15987 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15988
15989 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15990 ins_encode %{
15991 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15992 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15993 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15994 %}
15995 ins_pipe( pipe_slow );
15996 %}
15997
15998 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15999 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16000 %{
16001 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16002 match(Set result (AryEq ary1 ary2));
16003 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16004
16005 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16006 ins_encode %{
16007 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16008 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16009 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16010 %}
16011 ins_pipe( pipe_slow );
16012 %}
16013
16014 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16015 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16016 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16017 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16018 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16019 %{
16020 predicate(UseAVX >= 2);
16021 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16022 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16023 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16024 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16025 USE basic_type, KILL cr);
16026
16027 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16028 ins_encode %{
16029 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16030 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16031 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16032 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16033 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16034 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16035 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16036 %}
16037 ins_pipe( pipe_slow );
16038 %}
16039
16040 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16041 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16042 %{
16043 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16044 match(Set result (CountPositives ary1 len));
16045 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16046
16047 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16048 ins_encode %{
16049 __ count_positives($ary1$$Register, $len$$Register,
16050 $result$$Register, $tmp3$$Register,
16051 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16052 %}
16053 ins_pipe( pipe_slow );
16054 %}
16055
16056 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16057 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16058 %{
16059 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16060 match(Set result (CountPositives ary1 len));
16061 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16062
16063 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16064 ins_encode %{
16065 __ count_positives($ary1$$Register, $len$$Register,
16066 $result$$Register, $tmp3$$Register,
16067 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16068 %}
16069 ins_pipe( pipe_slow );
16070 %}
16071
16072 // fast char[] to byte[] compression
16073 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16074 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16075 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16076 match(Set result (StrCompressedCopy src (Binary dst len)));
16077 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16078 USE_KILL len, KILL tmp5, KILL cr);
16079
16080 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16081 ins_encode %{
16082 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16083 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16084 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16085 knoreg, knoreg);
16086 %}
16087 ins_pipe( pipe_slow );
16088 %}
16089
16090 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16091 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16092 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16093 match(Set result (StrCompressedCopy src (Binary dst len)));
16094 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16095 USE_KILL len, KILL tmp5, KILL cr);
16096
16097 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16098 ins_encode %{
16099 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16100 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16101 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16102 $ktmp1$$KRegister, $ktmp2$$KRegister);
16103 %}
16104 ins_pipe( pipe_slow );
16105 %}
16106 // fast byte[] to char[] inflation
16107 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16108 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16109 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16110 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16111 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16112
16113 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16114 ins_encode %{
16115 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16116 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16117 %}
16118 ins_pipe( pipe_slow );
16119 %}
16120
16121 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16122 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16123 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16124 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16125 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16126
16127 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16128 ins_encode %{
16129 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16130 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16131 %}
16132 ins_pipe( pipe_slow );
16133 %}
16134
16135 // encode char[] to byte[] in ISO_8859_1
16136 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16137 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16138 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16139 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16140 match(Set result (EncodeISOArray src (Binary dst len)));
16141 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16142
16143 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16144 ins_encode %{
16145 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16146 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16147 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16148 %}
16149 ins_pipe( pipe_slow );
16150 %}
16151
16152 // encode char[] to byte[] in ASCII
16153 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16154 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16155 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16156 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16157 match(Set result (EncodeISOArray src (Binary dst len)));
16158 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16159
16160 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16161 ins_encode %{
16162 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16163 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16164 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16165 %}
16166 ins_pipe( pipe_slow );
16167 %}
16168
16169 //----------Overflow Math Instructions-----------------------------------------
16170
16171 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16172 %{
16173 match(Set cr (OverflowAddI op1 op2));
16174 effect(DEF cr, USE_KILL op1, USE op2);
16175
16176 format %{ "addl $op1, $op2\t# overflow check int" %}
16177
16178 ins_encode %{
16179 __ addl($op1$$Register, $op2$$Register);
16180 %}
16181 ins_pipe(ialu_reg_reg);
16182 %}
16183
16184 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16185 %{
16186 match(Set cr (OverflowAddI op1 op2));
16187 effect(DEF cr, USE_KILL op1, USE op2);
16188
16189 format %{ "addl $op1, $op2\t# overflow check int" %}
16190
16191 ins_encode %{
16192 __ addl($op1$$Register, $op2$$constant);
16193 %}
16194 ins_pipe(ialu_reg_reg);
16195 %}
16196
16197 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16198 %{
16199 match(Set cr (OverflowAddL op1 op2));
16200 effect(DEF cr, USE_KILL op1, USE op2);
16201
16202 format %{ "addq $op1, $op2\t# overflow check long" %}
16203 ins_encode %{
16204 __ addq($op1$$Register, $op2$$Register);
16205 %}
16206 ins_pipe(ialu_reg_reg);
16207 %}
16208
16209 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16210 %{
16211 match(Set cr (OverflowAddL op1 op2));
16212 effect(DEF cr, USE_KILL op1, USE op2);
16213
16214 format %{ "addq $op1, $op2\t# overflow check long" %}
16215 ins_encode %{
16216 __ addq($op1$$Register, $op2$$constant);
16217 %}
16218 ins_pipe(ialu_reg_reg);
16219 %}
16220
16221 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16222 %{
16223 match(Set cr (OverflowSubI op1 op2));
16224
16225 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16226 ins_encode %{
16227 __ cmpl($op1$$Register, $op2$$Register);
16228 %}
16229 ins_pipe(ialu_reg_reg);
16230 %}
16231
16232 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16233 %{
16234 match(Set cr (OverflowSubI op1 op2));
16235
16236 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16237 ins_encode %{
16238 __ cmpl($op1$$Register, $op2$$constant);
16239 %}
16240 ins_pipe(ialu_reg_reg);
16241 %}
16242
16243 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16244 %{
16245 match(Set cr (OverflowSubL op1 op2));
16246
16247 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16248 ins_encode %{
16249 __ cmpq($op1$$Register, $op2$$Register);
16250 %}
16251 ins_pipe(ialu_reg_reg);
16252 %}
16253
16254 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16255 %{
16256 match(Set cr (OverflowSubL op1 op2));
16257
16258 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16259 ins_encode %{
16260 __ cmpq($op1$$Register, $op2$$constant);
16261 %}
16262 ins_pipe(ialu_reg_reg);
16263 %}
16264
16265 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16266 %{
16267 match(Set cr (OverflowSubI zero op2));
16268 effect(DEF cr, USE_KILL op2);
16269
16270 format %{ "negl $op2\t# overflow check int" %}
16271 ins_encode %{
16272 __ negl($op2$$Register);
16273 %}
16274 ins_pipe(ialu_reg_reg);
16275 %}
16276
16277 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16278 %{
16279 match(Set cr (OverflowSubL zero op2));
16280 effect(DEF cr, USE_KILL op2);
16281
16282 format %{ "negq $op2\t# overflow check long" %}
16283 ins_encode %{
16284 __ negq($op2$$Register);
16285 %}
16286 ins_pipe(ialu_reg_reg);
16287 %}
16288
16289 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16290 %{
16291 match(Set cr (OverflowMulI op1 op2));
16292 effect(DEF cr, USE_KILL op1, USE op2);
16293
16294 format %{ "imull $op1, $op2\t# overflow check int" %}
16295 ins_encode %{
16296 __ imull($op1$$Register, $op2$$Register);
16297 %}
16298 ins_pipe(ialu_reg_reg_alu0);
16299 %}
16300
16301 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16302 %{
16303 match(Set cr (OverflowMulI op1 op2));
16304 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16305
16306 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16307 ins_encode %{
16308 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16309 %}
16310 ins_pipe(ialu_reg_reg_alu0);
16311 %}
16312
16313 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16314 %{
16315 match(Set cr (OverflowMulL op1 op2));
16316 effect(DEF cr, USE_KILL op1, USE op2);
16317
16318 format %{ "imulq $op1, $op2\t# overflow check long" %}
16319 ins_encode %{
16320 __ imulq($op1$$Register, $op2$$Register);
16321 %}
16322 ins_pipe(ialu_reg_reg_alu0);
16323 %}
16324
16325 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16326 %{
16327 match(Set cr (OverflowMulL op1 op2));
16328 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16329
16330 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16331 ins_encode %{
16332 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16333 %}
16334 ins_pipe(ialu_reg_reg_alu0);
16335 %}
16336
16337
16338 //----------Control Flow Instructions------------------------------------------
16339 // Signed compare Instructions
16340
16341 // XXX more variants!!
16342 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16343 %{
16344 match(Set cr (CmpI op1 op2));
16345 effect(DEF cr, USE op1, USE op2);
16346
16347 format %{ "cmpl $op1, $op2" %}
16348 ins_encode %{
16349 __ cmpl($op1$$Register, $op2$$Register);
16350 %}
16351 ins_pipe(ialu_cr_reg_reg);
16352 %}
16353
16354 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16355 %{
16356 match(Set cr (CmpI op1 op2));
16357
16358 format %{ "cmpl $op1, $op2" %}
16359 ins_encode %{
16360 __ cmpl($op1$$Register, $op2$$constant);
16361 %}
16362 ins_pipe(ialu_cr_reg_imm);
16363 %}
16364
16365 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16366 %{
16367 match(Set cr (CmpI op1 (LoadI op2)));
16368
16369 ins_cost(500); // XXX
16370 format %{ "cmpl $op1, $op2" %}
16371 ins_encode %{
16372 __ cmpl($op1$$Register, $op2$$Address);
16373 %}
16374 ins_pipe(ialu_cr_reg_mem);
16375 %}
16376
16377 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16378 %{
16379 match(Set cr (CmpI src zero));
16380
16381 format %{ "testl $src, $src" %}
16382 ins_encode %{
16383 __ testl($src$$Register, $src$$Register);
16384 %}
16385 ins_pipe(ialu_cr_reg_imm);
16386 %}
16387
16388 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16389 %{
16390 match(Set cr (CmpI (AndI src con) zero));
16391
16392 format %{ "testl $src, $con" %}
16393 ins_encode %{
16394 __ testl($src$$Register, $con$$constant);
16395 %}
16396 ins_pipe(ialu_cr_reg_imm);
16397 %}
16398
16399 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16400 %{
16401 match(Set cr (CmpI (AndI src1 src2) zero));
16402
16403 format %{ "testl $src1, $src2" %}
16404 ins_encode %{
16405 __ testl($src1$$Register, $src2$$Register);
16406 %}
16407 ins_pipe(ialu_cr_reg_imm);
16408 %}
16409
16410 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16411 %{
16412 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16413
16414 format %{ "testl $src, $mem" %}
16415 ins_encode %{
16416 __ testl($src$$Register, $mem$$Address);
16417 %}
16418 ins_pipe(ialu_cr_reg_mem);
16419 %}
16420
16421 // Unsigned compare Instructions; really, same as signed except they
16422 // produce an rFlagsRegU instead of rFlagsReg.
16423 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16424 %{
16425 match(Set cr (CmpU op1 op2));
16426
16427 format %{ "cmpl $op1, $op2\t# unsigned" %}
16428 ins_encode %{
16429 __ cmpl($op1$$Register, $op2$$Register);
16430 %}
16431 ins_pipe(ialu_cr_reg_reg);
16432 %}
16433
16434 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16435 %{
16436 match(Set cr (CmpU op1 op2));
16437
16438 format %{ "cmpl $op1, $op2\t# unsigned" %}
16439 ins_encode %{
16440 __ cmpl($op1$$Register, $op2$$constant);
16441 %}
16442 ins_pipe(ialu_cr_reg_imm);
16443 %}
16444
16445 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16446 %{
16447 match(Set cr (CmpU op1 (LoadI op2)));
16448
16449 ins_cost(500); // XXX
16450 format %{ "cmpl $op1, $op2\t# unsigned" %}
16451 ins_encode %{
16452 __ cmpl($op1$$Register, $op2$$Address);
16453 %}
16454 ins_pipe(ialu_cr_reg_mem);
16455 %}
16456
16457 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16458 %{
16459 match(Set cr (CmpU src zero));
16460
16461 format %{ "testl $src, $src\t# unsigned" %}
16462 ins_encode %{
16463 __ testl($src$$Register, $src$$Register);
16464 %}
16465 ins_pipe(ialu_cr_reg_imm);
16466 %}
16467
16468 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16469 %{
16470 match(Set cr (CmpP op1 op2));
16471
16472 format %{ "cmpq $op1, $op2\t# ptr" %}
16473 ins_encode %{
16474 __ cmpq($op1$$Register, $op2$$Register);
16475 %}
16476 ins_pipe(ialu_cr_reg_reg);
16477 %}
16478
16479 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16480 %{
16481 match(Set cr (CmpP op1 (LoadP op2)));
16482 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16483
16484 ins_cost(500); // XXX
16485 format %{ "cmpq $op1, $op2\t# ptr" %}
16486 ins_encode %{
16487 __ cmpq($op1$$Register, $op2$$Address);
16488 %}
16489 ins_pipe(ialu_cr_reg_mem);
16490 %}
16491
16492 // XXX this is generalized by compP_rReg_mem???
16493 // Compare raw pointer (used in out-of-heap check).
16494 // Only works because non-oop pointers must be raw pointers
16495 // and raw pointers have no anti-dependencies.
16496 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16497 %{
16498 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16499 n->in(2)->as_Load()->barrier_data() == 0);
16500 match(Set cr (CmpP op1 (LoadP op2)));
16501
16502 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16503 ins_encode %{
16504 __ cmpq($op1$$Register, $op2$$Address);
16505 %}
16506 ins_pipe(ialu_cr_reg_mem);
16507 %}
16508
16509 // This will generate a signed flags result. This should be OK since
16510 // any compare to a zero should be eq/neq.
16511 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16512 %{
16513 match(Set cr (CmpP src zero));
16514
16515 format %{ "testq $src, $src\t# ptr" %}
16516 ins_encode %{
16517 __ testq($src$$Register, $src$$Register);
16518 %}
16519 ins_pipe(ialu_cr_reg_imm);
16520 %}
16521
16522 // This will generate a signed flags result. This should be OK since
16523 // any compare to a zero should be eq/neq.
16524 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16525 %{
16526 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16527 n->in(1)->as_Load()->barrier_data() == 0);
16528 match(Set cr (CmpP (LoadP op) zero));
16529
16530 ins_cost(500); // XXX
16531 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16532 ins_encode %{
16533 __ testq($op$$Address, 0xFFFFFFFF);
16534 %}
16535 ins_pipe(ialu_cr_reg_imm);
16536 %}
16537
16538 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16539 %{
16540 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16541 n->in(1)->as_Load()->barrier_data() == 0);
16542 match(Set cr (CmpP (LoadP mem) zero));
16543
16544 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16545 ins_encode %{
16546 __ cmpq(r12, $mem$$Address);
16547 %}
16548 ins_pipe(ialu_cr_reg_mem);
16549 %}
16550
16551 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16552 %{
16553 match(Set cr (CmpN op1 op2));
16554
16555 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16556 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16557 ins_pipe(ialu_cr_reg_reg);
16558 %}
16559
16560 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16561 %{
16562 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16563 match(Set cr (CmpN src (LoadN mem)));
16564
16565 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16566 ins_encode %{
16567 __ cmpl($src$$Register, $mem$$Address);
16568 %}
16569 ins_pipe(ialu_cr_reg_mem);
16570 %}
16571
16572 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16573 match(Set cr (CmpN op1 op2));
16574
16575 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16576 ins_encode %{
16577 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16578 %}
16579 ins_pipe(ialu_cr_reg_imm);
16580 %}
16581
16582 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16583 %{
16584 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16585 match(Set cr (CmpN src (LoadN mem)));
16586
16587 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16588 ins_encode %{
16589 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16590 %}
16591 ins_pipe(ialu_cr_reg_mem);
16592 %}
16593
16594 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16595 match(Set cr (CmpN op1 op2));
16596
16597 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16598 ins_encode %{
16599 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16600 %}
16601 ins_pipe(ialu_cr_reg_imm);
16602 %}
16603
16604 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16605 %{
16606 predicate(!UseCompactObjectHeaders);
16607 match(Set cr (CmpN src (LoadNKlass mem)));
16608
16609 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16610 ins_encode %{
16611 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16612 %}
16613 ins_pipe(ialu_cr_reg_mem);
16614 %}
16615
16616 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16617 match(Set cr (CmpN src zero));
16618
16619 format %{ "testl $src, $src\t# compressed ptr" %}
16620 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16621 ins_pipe(ialu_cr_reg_imm);
16622 %}
16623
16624 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16625 %{
16626 predicate(CompressedOops::base() != nullptr &&
16627 n->in(1)->as_Load()->barrier_data() == 0);
16628 match(Set cr (CmpN (LoadN mem) zero));
16629
16630 ins_cost(500); // XXX
16631 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16632 ins_encode %{
16633 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16634 %}
16635 ins_pipe(ialu_cr_reg_mem);
16636 %}
16637
16638 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16639 %{
16640 predicate(CompressedOops::base() == nullptr &&
16641 n->in(1)->as_Load()->barrier_data() == 0);
16642 match(Set cr (CmpN (LoadN mem) zero));
16643
16644 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16645 ins_encode %{
16646 __ cmpl(r12, $mem$$Address);
16647 %}
16648 ins_pipe(ialu_cr_reg_mem);
16649 %}
16650
16651 // Yanked all unsigned pointer compare operations.
16652 // Pointer compares are done with CmpP which is already unsigned.
16653
16654 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16655 %{
16656 match(Set cr (CmpL op1 op2));
16657
16658 format %{ "cmpq $op1, $op2" %}
16659 ins_encode %{
16660 __ cmpq($op1$$Register, $op2$$Register);
16661 %}
16662 ins_pipe(ialu_cr_reg_reg);
16663 %}
16664
16665 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16666 %{
16667 match(Set cr (CmpL op1 op2));
16668
16669 format %{ "cmpq $op1, $op2" %}
16670 ins_encode %{
16671 __ cmpq($op1$$Register, $op2$$constant);
16672 %}
16673 ins_pipe(ialu_cr_reg_imm);
16674 %}
16675
16676 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16677 %{
16678 match(Set cr (CmpL op1 (LoadL op2)));
16679
16680 format %{ "cmpq $op1, $op2" %}
16681 ins_encode %{
16682 __ cmpq($op1$$Register, $op2$$Address);
16683 %}
16684 ins_pipe(ialu_cr_reg_mem);
16685 %}
16686
16687 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16688 %{
16689 match(Set cr (CmpL src zero));
16690
16691 format %{ "testq $src, $src" %}
16692 ins_encode %{
16693 __ testq($src$$Register, $src$$Register);
16694 %}
16695 ins_pipe(ialu_cr_reg_imm);
16696 %}
16697
16698 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16699 %{
16700 match(Set cr (CmpL (AndL src con) zero));
16701
16702 format %{ "testq $src, $con\t# long" %}
16703 ins_encode %{
16704 __ testq($src$$Register, $con$$constant);
16705 %}
16706 ins_pipe(ialu_cr_reg_imm);
16707 %}
16708
16709 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16710 %{
16711 match(Set cr (CmpL (AndL src1 src2) zero));
16712
16713 format %{ "testq $src1, $src2\t# long" %}
16714 ins_encode %{
16715 __ testq($src1$$Register, $src2$$Register);
16716 %}
16717 ins_pipe(ialu_cr_reg_imm);
16718 %}
16719
16720 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16721 %{
16722 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16723
16724 format %{ "testq $src, $mem" %}
16725 ins_encode %{
16726 __ testq($src$$Register, $mem$$Address);
16727 %}
16728 ins_pipe(ialu_cr_reg_mem);
16729 %}
16730
16731 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16732 %{
16733 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16734
16735 format %{ "testq $src, $mem" %}
16736 ins_encode %{
16737 __ testq($src$$Register, $mem$$Address);
16738 %}
16739 ins_pipe(ialu_cr_reg_mem);
16740 %}
16741
16742 // Manifest a CmpU result in an integer register. Very painful.
16743 // This is the test to avoid.
16744 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16745 %{
16746 match(Set dst (CmpU3 src1 src2));
16747 effect(KILL flags);
16748
16749 ins_cost(275); // XXX
16750 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16751 "movl $dst, -1\n\t"
16752 "jb,u done\n\t"
16753 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16754 "done:" %}
16755 ins_encode %{
16756 Label done;
16757 __ cmpl($src1$$Register, $src2$$Register);
16758 __ movl($dst$$Register, -1);
16759 __ jccb(Assembler::below, done);
16760 __ setcc(Assembler::notZero, $dst$$Register);
16761 __ bind(done);
16762 %}
16763 ins_pipe(pipe_slow);
16764 %}
16765
16766 // Manifest a CmpL result in an integer register. Very painful.
16767 // This is the test to avoid.
16768 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16769 %{
16770 match(Set dst (CmpL3 src1 src2));
16771 effect(KILL flags);
16772
16773 ins_cost(275); // XXX
16774 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16775 "movl $dst, -1\n\t"
16776 "jl,s done\n\t"
16777 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16778 "done:" %}
16779 ins_encode %{
16780 Label done;
16781 __ cmpq($src1$$Register, $src2$$Register);
16782 __ movl($dst$$Register, -1);
16783 __ jccb(Assembler::less, done);
16784 __ setcc(Assembler::notZero, $dst$$Register);
16785 __ bind(done);
16786 %}
16787 ins_pipe(pipe_slow);
16788 %}
16789
16790 // Manifest a CmpUL result in an integer register. Very painful.
16791 // This is the test to avoid.
16792 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16793 %{
16794 match(Set dst (CmpUL3 src1 src2));
16795 effect(KILL flags);
16796
16797 ins_cost(275); // XXX
16798 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16799 "movl $dst, -1\n\t"
16800 "jb,u done\n\t"
16801 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16802 "done:" %}
16803 ins_encode %{
16804 Label done;
16805 __ cmpq($src1$$Register, $src2$$Register);
16806 __ movl($dst$$Register, -1);
16807 __ jccb(Assembler::below, done);
16808 __ setcc(Assembler::notZero, $dst$$Register);
16809 __ bind(done);
16810 %}
16811 ins_pipe(pipe_slow);
16812 %}
16813
16814 // Unsigned long compare Instructions; really, same as signed long except they
16815 // produce an rFlagsRegU instead of rFlagsReg.
16816 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16817 %{
16818 match(Set cr (CmpUL op1 op2));
16819
16820 format %{ "cmpq $op1, $op2\t# unsigned" %}
16821 ins_encode %{
16822 __ cmpq($op1$$Register, $op2$$Register);
16823 %}
16824 ins_pipe(ialu_cr_reg_reg);
16825 %}
16826
16827 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16828 %{
16829 match(Set cr (CmpUL op1 op2));
16830
16831 format %{ "cmpq $op1, $op2\t# unsigned" %}
16832 ins_encode %{
16833 __ cmpq($op1$$Register, $op2$$constant);
16834 %}
16835 ins_pipe(ialu_cr_reg_imm);
16836 %}
16837
16838 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16839 %{
16840 match(Set cr (CmpUL op1 (LoadL op2)));
16841
16842 format %{ "cmpq $op1, $op2\t# unsigned" %}
16843 ins_encode %{
16844 __ cmpq($op1$$Register, $op2$$Address);
16845 %}
16846 ins_pipe(ialu_cr_reg_mem);
16847 %}
16848
16849 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16850 %{
16851 match(Set cr (CmpUL src zero));
16852
16853 format %{ "testq $src, $src\t# unsigned" %}
16854 ins_encode %{
16855 __ testq($src$$Register, $src$$Register);
16856 %}
16857 ins_pipe(ialu_cr_reg_imm);
16858 %}
16859
16860 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16861 %{
16862 match(Set cr (CmpI (LoadB mem) imm));
16863
16864 ins_cost(125);
16865 format %{ "cmpb $mem, $imm" %}
16866 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16867 ins_pipe(ialu_cr_reg_mem);
16868 %}
16869
16870 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16871 %{
16872 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16873
16874 ins_cost(125);
16875 format %{ "testb $mem, $imm\t# ubyte" %}
16876 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16877 ins_pipe(ialu_cr_reg_mem);
16878 %}
16879
16880 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16881 %{
16882 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16883
16884 ins_cost(125);
16885 format %{ "testb $mem, $imm\t# byte" %}
16886 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16887 ins_pipe(ialu_cr_reg_mem);
16888 %}
16889
16890 //----------Max and Min--------------------------------------------------------
16891 // Min Instructions
16892
16893 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16894 %{
16895 predicate(!UseAPX);
16896 effect(USE_DEF dst, USE src, USE cr);
16897
16898 format %{ "cmovlgt $dst, $src\t# min" %}
16899 ins_encode %{
16900 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16901 %}
16902 ins_pipe(pipe_cmov_reg);
16903 %}
16904
16905 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16906 %{
16907 predicate(UseAPX);
16908 effect(DEF dst, USE src1, USE src2, USE cr);
16909
16910 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16911 ins_encode %{
16912 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16913 %}
16914 ins_pipe(pipe_cmov_reg);
16915 %}
16916
16917 instruct minI_rReg(rRegI dst, rRegI src)
16918 %{
16919 predicate(!UseAPX);
16920 match(Set dst (MinI dst src));
16921
16922 ins_cost(200);
16923 expand %{
16924 rFlagsReg cr;
16925 compI_rReg(cr, dst, src);
16926 cmovI_reg_g(dst, src, cr);
16927 %}
16928 %}
16929
16930 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16931 %{
16932 predicate(UseAPX);
16933 match(Set dst (MinI src1 src2));
16934 effect(DEF dst, USE src1, USE src2);
16935 flag(PD::Flag_ndd_demotable);
16936
16937 ins_cost(200);
16938 expand %{
16939 rFlagsReg cr;
16940 compI_rReg(cr, src1, src2);
16941 cmovI_reg_g_ndd(dst, src1, src2, cr);
16942 %}
16943 %}
16944
16945 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16946 %{
16947 predicate(!UseAPX);
16948 effect(USE_DEF dst, USE src, USE cr);
16949
16950 format %{ "cmovllt $dst, $src\t# max" %}
16951 ins_encode %{
16952 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16953 %}
16954 ins_pipe(pipe_cmov_reg);
16955 %}
16956
16957 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16958 %{
16959 predicate(UseAPX);
16960 effect(DEF dst, USE src1, USE src2, USE cr);
16961
16962 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16963 ins_encode %{
16964 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16965 %}
16966 ins_pipe(pipe_cmov_reg);
16967 %}
16968
16969 instruct maxI_rReg(rRegI dst, rRegI src)
16970 %{
16971 predicate(!UseAPX);
16972 match(Set dst (MaxI dst src));
16973
16974 ins_cost(200);
16975 expand %{
16976 rFlagsReg cr;
16977 compI_rReg(cr, dst, src);
16978 cmovI_reg_l(dst, src, cr);
16979 %}
16980 %}
16981
16982 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16983 %{
16984 predicate(UseAPX);
16985 match(Set dst (MaxI src1 src2));
16986 effect(DEF dst, USE src1, USE src2);
16987 flag(PD::Flag_ndd_demotable);
16988
16989 ins_cost(200);
16990 expand %{
16991 rFlagsReg cr;
16992 compI_rReg(cr, src1, src2);
16993 cmovI_reg_l_ndd(dst, src1, src2, cr);
16994 %}
16995 %}
16996
16997 // ============================================================================
16998 // Branch Instructions
16999
17000 // Jump Direct - Label defines a relative address from JMP+1
17001 instruct jmpDir(label labl)
17002 %{
17003 match(Goto);
17004 effect(USE labl);
17005
17006 ins_cost(300);
17007 format %{ "jmp $labl" %}
17008 size(5);
17009 ins_encode %{
17010 Label* L = $labl$$label;
17011 __ jmp(*L, false); // Always long jump
17012 %}
17013 ins_pipe(pipe_jmp);
17014 %}
17015
17016 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17017 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17018 %{
17019 match(If cop cr);
17020 effect(USE labl);
17021
17022 ins_cost(300);
17023 format %{ "j$cop $labl" %}
17024 size(6);
17025 ins_encode %{
17026 Label* L = $labl$$label;
17027 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17028 %}
17029 ins_pipe(pipe_jcc);
17030 %}
17031
17032 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17033 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17034 %{
17035 match(CountedLoopEnd cop cr);
17036 effect(USE labl);
17037
17038 ins_cost(300);
17039 format %{ "j$cop $labl\t# loop end" %}
17040 size(6);
17041 ins_encode %{
17042 Label* L = $labl$$label;
17043 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17044 %}
17045 ins_pipe(pipe_jcc);
17046 %}
17047
17048 // Jump Direct Conditional - using unsigned comparison
17049 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17050 match(If cop cmp);
17051 effect(USE labl);
17052
17053 ins_cost(300);
17054 format %{ "j$cop,u $labl" %}
17055 size(6);
17056 ins_encode %{
17057 Label* L = $labl$$label;
17058 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17059 %}
17060 ins_pipe(pipe_jcc);
17061 %}
17062
17063 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17064 match(If cop cmp);
17065 effect(USE labl);
17066
17067 ins_cost(200);
17068 format %{ "j$cop,u $labl" %}
17069 size(6);
17070 ins_encode %{
17071 Label* L = $labl$$label;
17072 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17073 %}
17074 ins_pipe(pipe_jcc);
17075 %}
17076
17077 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17078 match(If cop cmp);
17079 effect(USE labl);
17080
17081 ins_cost(200);
17082 format %{ $$template
17083 if ($cop$$cmpcode == Assembler::notEqual) {
17084 $$emit$$"jp,u $labl\n\t"
17085 $$emit$$"j$cop,u $labl"
17086 } else {
17087 $$emit$$"jp,u done\n\t"
17088 $$emit$$"j$cop,u $labl\n\t"
17089 $$emit$$"done:"
17090 }
17091 %}
17092 ins_encode %{
17093 Label* l = $labl$$label;
17094 if ($cop$$cmpcode == Assembler::notEqual) {
17095 __ jcc(Assembler::parity, *l, false);
17096 __ jcc(Assembler::notEqual, *l, false);
17097 } else if ($cop$$cmpcode == Assembler::equal) {
17098 Label done;
17099 __ jccb(Assembler::parity, done);
17100 __ jcc(Assembler::equal, *l, false);
17101 __ bind(done);
17102 } else {
17103 ShouldNotReachHere();
17104 }
17105 %}
17106 ins_pipe(pipe_jcc);
17107 %}
17108
17109 // ============================================================================
17110 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17111 // superklass array for an instance of the superklass. Set a hidden
17112 // internal cache on a hit (cache is checked with exposed code in
17113 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17114 // encoding ALSO sets flags.
17115
17116 instruct partialSubtypeCheck(rdi_RegP result,
17117 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17118 rFlagsReg cr)
17119 %{
17120 match(Set result (PartialSubtypeCheck sub super));
17121 predicate(!UseSecondarySupersTable);
17122 effect(KILL rcx, KILL cr);
17123
17124 ins_cost(1100); // slightly larger than the next version
17125 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17126 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17127 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17128 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17129 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17130 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17131 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17132 "miss:\t" %}
17133
17134 ins_encode %{
17135 Label miss;
17136 // NB: Callers may assume that, when $result is a valid register,
17137 // check_klass_subtype_slow_path_linear sets it to a nonzero
17138 // value.
17139 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17140 $rcx$$Register, $result$$Register,
17141 nullptr, &miss,
17142 /*set_cond_codes:*/ true);
17143 __ xorptr($result$$Register, $result$$Register);
17144 __ bind(miss);
17145 %}
17146
17147 ins_pipe(pipe_slow);
17148 %}
17149
17150 // ============================================================================
17151 // Two versions of hashtable-based partialSubtypeCheck, both used when
17152 // we need to search for a super class in the secondary supers array.
17153 // The first is used when we don't know _a priori_ the class being
17154 // searched for. The second, far more common, is used when we do know:
17155 // this is used for instanceof, checkcast, and any case where C2 can
17156 // determine it by constant propagation.
17157
17158 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17159 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17160 rFlagsReg cr)
17161 %{
17162 match(Set result (PartialSubtypeCheck sub super));
17163 predicate(UseSecondarySupersTable);
17164 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17165
17166 ins_cost(1000);
17167 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17168
17169 ins_encode %{
17170 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17171 $temp3$$Register, $temp4$$Register, $result$$Register);
17172 %}
17173
17174 ins_pipe(pipe_slow);
17175 %}
17176
17177 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17178 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17179 rFlagsReg cr)
17180 %{
17181 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17182 predicate(UseSecondarySupersTable);
17183 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17184
17185 ins_cost(700); // smaller than the next version
17186 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17187
17188 ins_encode %{
17189 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17190 if (InlineSecondarySupersTest) {
17191 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17192 $temp3$$Register, $temp4$$Register, $result$$Register,
17193 super_klass_slot);
17194 } else {
17195 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17196 }
17197 %}
17198
17199 ins_pipe(pipe_slow);
17200 %}
17201
17202 // ============================================================================
17203 // Branch Instructions -- short offset versions
17204 //
17205 // These instructions are used to replace jumps of a long offset (the default
17206 // match) with jumps of a shorter offset. These instructions are all tagged
17207 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17208 // match rules in general matching. Instead, the ADLC generates a conversion
17209 // method in the MachNode which can be used to do in-place replacement of the
17210 // long variant with the shorter variant. The compiler will determine if a
17211 // branch can be taken by the is_short_branch_offset() predicate in the machine
17212 // specific code section of the file.
17213
17214 // Jump Direct - Label defines a relative address from JMP+1
17215 instruct jmpDir_short(label labl) %{
17216 match(Goto);
17217 effect(USE labl);
17218
17219 ins_cost(300);
17220 format %{ "jmp,s $labl" %}
17221 size(2);
17222 ins_encode %{
17223 Label* L = $labl$$label;
17224 __ jmpb(*L);
17225 %}
17226 ins_pipe(pipe_jmp);
17227 ins_short_branch(1);
17228 %}
17229
17230 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17231 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17232 match(If cop cr);
17233 effect(USE labl);
17234
17235 ins_cost(300);
17236 format %{ "j$cop,s $labl" %}
17237 size(2);
17238 ins_encode %{
17239 Label* L = $labl$$label;
17240 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17241 %}
17242 ins_pipe(pipe_jcc);
17243 ins_short_branch(1);
17244 %}
17245
17246 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17247 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17248 match(CountedLoopEnd cop cr);
17249 effect(USE labl);
17250
17251 ins_cost(300);
17252 format %{ "j$cop,s $labl\t# loop end" %}
17253 size(2);
17254 ins_encode %{
17255 Label* L = $labl$$label;
17256 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17257 %}
17258 ins_pipe(pipe_jcc);
17259 ins_short_branch(1);
17260 %}
17261
17262 // Jump Direct Conditional - using unsigned comparison
17263 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17264 match(If cop cmp);
17265 effect(USE labl);
17266
17267 ins_cost(300);
17268 format %{ "j$cop,us $labl" %}
17269 size(2);
17270 ins_encode %{
17271 Label* L = $labl$$label;
17272 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17273 %}
17274 ins_pipe(pipe_jcc);
17275 ins_short_branch(1);
17276 %}
17277
17278 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17279 match(If cop cmp);
17280 effect(USE labl);
17281
17282 ins_cost(300);
17283 format %{ "j$cop,us $labl" %}
17284 size(2);
17285 ins_encode %{
17286 Label* L = $labl$$label;
17287 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17288 %}
17289 ins_pipe(pipe_jcc);
17290 ins_short_branch(1);
17291 %}
17292
17293 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17294 match(If cop cmp);
17295 effect(USE labl);
17296
17297 ins_cost(300);
17298 format %{ $$template
17299 if ($cop$$cmpcode == Assembler::notEqual) {
17300 $$emit$$"jp,u,s $labl\n\t"
17301 $$emit$$"j$cop,u,s $labl"
17302 } else {
17303 $$emit$$"jp,u,s done\n\t"
17304 $$emit$$"j$cop,u,s $labl\n\t"
17305 $$emit$$"done:"
17306 }
17307 %}
17308 size(4);
17309 ins_encode %{
17310 Label* l = $labl$$label;
17311 if ($cop$$cmpcode == Assembler::notEqual) {
17312 __ jccb(Assembler::parity, *l);
17313 __ jccb(Assembler::notEqual, *l);
17314 } else if ($cop$$cmpcode == Assembler::equal) {
17315 Label done;
17316 __ jccb(Assembler::parity, done);
17317 __ jccb(Assembler::equal, *l);
17318 __ bind(done);
17319 } else {
17320 ShouldNotReachHere();
17321 }
17322 %}
17323 ins_pipe(pipe_jcc);
17324 ins_short_branch(1);
17325 %}
17326
17327 // ============================================================================
17328 // inlined locking and unlocking
17329
17330 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17331 match(Set cr (FastLock object box));
17332 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17333 ins_cost(300);
17334 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17335 ins_encode %{
17336 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17337 %}
17338 ins_pipe(pipe_slow);
17339 %}
17340
17341 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17342 match(Set cr (FastUnlock object rax_reg));
17343 effect(TEMP tmp, USE_KILL rax_reg);
17344 ins_cost(300);
17345 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17346 ins_encode %{
17347 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17348 %}
17349 ins_pipe(pipe_slow);
17350 %}
17351
17352
17353 // ============================================================================
17354 // Safepoint Instructions
17355 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17356 %{
17357 match(SafePoint poll);
17358 effect(KILL cr, USE poll);
17359
17360 format %{ "testl rax, [$poll]\t"
17361 "# Safepoint: poll for GC" %}
17362 ins_cost(125);
17363 ins_encode %{
17364 __ relocate(relocInfo::poll_type);
17365 address pre_pc = __ pc();
17366 __ testl(rax, Address($poll$$Register, 0));
17367 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17368 %}
17369 ins_pipe(ialu_reg_mem);
17370 %}
17371
17372 instruct mask_all_evexL(kReg dst, rRegL src) %{
17373 match(Set dst (MaskAll src));
17374 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17375 ins_encode %{
17376 int mask_len = Matcher::vector_length(this);
17377 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17378 %}
17379 ins_pipe( pipe_slow );
17380 %}
17381
17382 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17383 predicate(Matcher::vector_length(n) > 32);
17384 match(Set dst (MaskAll src));
17385 effect(TEMP tmp);
17386 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17387 ins_encode %{
17388 int mask_len = Matcher::vector_length(this);
17389 __ movslq($tmp$$Register, $src$$Register);
17390 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17391 %}
17392 ins_pipe( pipe_slow );
17393 %}
17394
17395 // ============================================================================
17396 // Procedure Call/Return Instructions
17397 // Call Java Static Instruction
17398 // Note: If this code changes, the corresponding ret_addr_offset() and
17399 // compute_padding() functions will have to be adjusted.
17400 instruct CallStaticJavaDirect(method meth) %{
17401 match(CallStaticJava);
17402 effect(USE meth);
17403
17404 ins_cost(300);
17405 format %{ "call,static " %}
17406 opcode(0xE8); /* E8 cd */
17407 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17408 ins_pipe(pipe_slow);
17409 ins_alignment(4);
17410 %}
17411
17412 // Call Java Dynamic Instruction
17413 // Note: If this code changes, the corresponding ret_addr_offset() and
17414 // compute_padding() functions will have to be adjusted.
17415 instruct CallDynamicJavaDirect(method meth)
17416 %{
17417 match(CallDynamicJava);
17418 effect(USE meth);
17419
17420 ins_cost(300);
17421 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17422 "call,dynamic " %}
17423 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17424 ins_pipe(pipe_slow);
17425 ins_alignment(4);
17426 %}
17427
17428 // Call Runtime Instruction
17429 instruct CallRuntimeDirect(method meth)
17430 %{
17431 match(CallRuntime);
17432 effect(USE meth);
17433
17434 ins_cost(300);
17435 format %{ "call,runtime " %}
17436 ins_encode(clear_avx, Java_To_Runtime(meth));
17437 ins_pipe(pipe_slow);
17438 %}
17439
17440 // Call runtime without safepoint
17441 instruct CallLeafDirect(method meth)
17442 %{
17443 match(CallLeaf);
17444 effect(USE meth);
17445
17446 ins_cost(300);
17447 format %{ "call_leaf,runtime " %}
17448 ins_encode(clear_avx, Java_To_Runtime(meth));
17449 ins_pipe(pipe_slow);
17450 %}
17451
17452 // Call runtime without safepoint and with vector arguments
17453 instruct CallLeafDirectVector(method meth)
17454 %{
17455 match(CallLeafVector);
17456 effect(USE meth);
17457
17458 ins_cost(300);
17459 format %{ "call_leaf,vector " %}
17460 ins_encode(Java_To_Runtime(meth));
17461 ins_pipe(pipe_slow);
17462 %}
17463
17464 // Call runtime without safepoint
17465 // entry point is null, target holds the address to call
17466 instruct CallLeafNoFPInDirect(rRegP target)
17467 %{
17468 predicate(n->as_Call()->entry_point() == nullptr);
17469 match(CallLeafNoFP target);
17470
17471 ins_cost(300);
17472 format %{ "call_leaf_nofp,runtime indirect " %}
17473 ins_encode %{
17474 __ call($target$$Register);
17475 %}
17476
17477 ins_pipe(pipe_slow);
17478 %}
17479
17480 // Call runtime without safepoint
17481 instruct CallLeafNoFPDirect(method meth)
17482 %{
17483 predicate(n->as_Call()->entry_point() != nullptr);
17484 match(CallLeafNoFP);
17485 effect(USE meth);
17486
17487 ins_cost(300);
17488 format %{ "call_leaf_nofp,runtime " %}
17489 ins_encode(clear_avx, Java_To_Runtime(meth));
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 // Return Instruction
17494 // Remove the return address & jump to it.
17495 // Notice: We always emit a nop after a ret to make sure there is room
17496 // for safepoint patching
17497 instruct Ret()
17498 %{
17499 match(Return);
17500
17501 format %{ "ret" %}
17502 ins_encode %{
17503 __ ret(0);
17504 %}
17505 ins_pipe(pipe_jmp);
17506 %}
17507
17508 // Tail Call; Jump from runtime stub to Java code.
17509 // Also known as an 'interprocedural jump'.
17510 // Target of jump will eventually return to caller.
17511 // TailJump below removes the return address.
17512 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17513 // emitted just above the TailCall which has reset rbp to the caller state.
17514 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17515 %{
17516 match(TailCall jump_target method_ptr);
17517
17518 ins_cost(300);
17519 format %{ "jmp $jump_target\t# rbx holds method" %}
17520 ins_encode %{
17521 __ jmp($jump_target$$Register);
17522 %}
17523 ins_pipe(pipe_jmp);
17524 %}
17525
17526 // Tail Jump; remove the return address; jump to target.
17527 // TailCall above leaves the return address around.
17528 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17529 %{
17530 match(TailJump jump_target ex_oop);
17531
17532 ins_cost(300);
17533 format %{ "popq rdx\t# pop return address\n\t"
17534 "jmp $jump_target" %}
17535 ins_encode %{
17536 __ popq(as_Register(RDX_enc));
17537 __ jmp($jump_target$$Register);
17538 %}
17539 ins_pipe(pipe_jmp);
17540 %}
17541
17542 // Forward exception.
17543 instruct ForwardExceptionjmp()
17544 %{
17545 match(ForwardException);
17546
17547 format %{ "jmp forward_exception_stub" %}
17548 ins_encode %{
17549 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17550 %}
17551 ins_pipe(pipe_jmp);
17552 %}
17553
17554 // Create exception oop: created by stack-crawling runtime code.
17555 // Created exception is now available to this handler, and is setup
17556 // just prior to jumping to this handler. No code emitted.
17557 instruct CreateException(rax_RegP ex_oop)
17558 %{
17559 match(Set ex_oop (CreateEx));
17560
17561 size(0);
17562 // use the following format syntax
17563 format %{ "# exception oop is in rax; no code emitted" %}
17564 ins_encode();
17565 ins_pipe(empty);
17566 %}
17567
17568 // Rethrow exception:
17569 // The exception oop will come in the first argument position.
17570 // Then JUMP (not call) to the rethrow stub code.
17571 instruct RethrowException()
17572 %{
17573 match(Rethrow);
17574
17575 // use the following format syntax
17576 format %{ "jmp rethrow_stub" %}
17577 ins_encode %{
17578 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17579 %}
17580 ins_pipe(pipe_jmp);
17581 %}
17582
17583 // ============================================================================
17584 // This name is KNOWN by the ADLC and cannot be changed.
17585 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17586 // for this guy.
17587 instruct tlsLoadP(r15_RegP dst) %{
17588 match(Set dst (ThreadLocal));
17589 effect(DEF dst);
17590
17591 size(0);
17592 format %{ "# TLS is in R15" %}
17593 ins_encode( /*empty encoding*/ );
17594 ins_pipe(ialu_reg_reg);
17595 %}
17596
17597 instruct addF_reg(regF dst, regF src) %{
17598 predicate(UseAVX == 0);
17599 match(Set dst (AddF dst src));
17600
17601 format %{ "addss $dst, $src" %}
17602 ins_cost(150);
17603 ins_encode %{
17604 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17605 %}
17606 ins_pipe(pipe_slow);
17607 %}
17608
17609 instruct addF_mem(regF dst, memory src) %{
17610 predicate(UseAVX == 0);
17611 match(Set dst (AddF dst (LoadF src)));
17612
17613 format %{ "addss $dst, $src" %}
17614 ins_cost(150);
17615 ins_encode %{
17616 __ addss($dst$$XMMRegister, $src$$Address);
17617 %}
17618 ins_pipe(pipe_slow);
17619 %}
17620
17621 instruct addF_imm(regF dst, immF con) %{
17622 predicate(UseAVX == 0);
17623 match(Set dst (AddF dst con));
17624 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17625 ins_cost(150);
17626 ins_encode %{
17627 __ addss($dst$$XMMRegister, $constantaddress($con));
17628 %}
17629 ins_pipe(pipe_slow);
17630 %}
17631
17632 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17633 predicate(UseAVX > 0);
17634 match(Set dst (AddF src1 src2));
17635
17636 format %{ "vaddss $dst, $src1, $src2" %}
17637 ins_cost(150);
17638 ins_encode %{
17639 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17640 %}
17641 ins_pipe(pipe_slow);
17642 %}
17643
17644 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17645 predicate(UseAVX > 0);
17646 match(Set dst (AddF src1 (LoadF src2)));
17647
17648 format %{ "vaddss $dst, $src1, $src2" %}
17649 ins_cost(150);
17650 ins_encode %{
17651 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17652 %}
17653 ins_pipe(pipe_slow);
17654 %}
17655
17656 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17657 predicate(UseAVX > 0);
17658 match(Set dst (AddF src con));
17659
17660 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17661 ins_cost(150);
17662 ins_encode %{
17663 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17664 %}
17665 ins_pipe(pipe_slow);
17666 %}
17667
17668 instruct addD_reg(regD dst, regD src) %{
17669 predicate(UseAVX == 0);
17670 match(Set dst (AddD dst src));
17671
17672 format %{ "addsd $dst, $src" %}
17673 ins_cost(150);
17674 ins_encode %{
17675 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17676 %}
17677 ins_pipe(pipe_slow);
17678 %}
17679
17680 instruct addD_mem(regD dst, memory src) %{
17681 predicate(UseAVX == 0);
17682 match(Set dst (AddD dst (LoadD src)));
17683
17684 format %{ "addsd $dst, $src" %}
17685 ins_cost(150);
17686 ins_encode %{
17687 __ addsd($dst$$XMMRegister, $src$$Address);
17688 %}
17689 ins_pipe(pipe_slow);
17690 %}
17691
17692 instruct addD_imm(regD dst, immD con) %{
17693 predicate(UseAVX == 0);
17694 match(Set dst (AddD dst con));
17695 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17696 ins_cost(150);
17697 ins_encode %{
17698 __ addsd($dst$$XMMRegister, $constantaddress($con));
17699 %}
17700 ins_pipe(pipe_slow);
17701 %}
17702
17703 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17704 predicate(UseAVX > 0);
17705 match(Set dst (AddD src1 src2));
17706
17707 format %{ "vaddsd $dst, $src1, $src2" %}
17708 ins_cost(150);
17709 ins_encode %{
17710 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17711 %}
17712 ins_pipe(pipe_slow);
17713 %}
17714
17715 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17716 predicate(UseAVX > 0);
17717 match(Set dst (AddD src1 (LoadD src2)));
17718
17719 format %{ "vaddsd $dst, $src1, $src2" %}
17720 ins_cost(150);
17721 ins_encode %{
17722 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17723 %}
17724 ins_pipe(pipe_slow);
17725 %}
17726
17727 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17728 predicate(UseAVX > 0);
17729 match(Set dst (AddD src con));
17730
17731 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17732 ins_cost(150);
17733 ins_encode %{
17734 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17735 %}
17736 ins_pipe(pipe_slow);
17737 %}
17738
17739 instruct subF_reg(regF dst, regF src) %{
17740 predicate(UseAVX == 0);
17741 match(Set dst (SubF dst src));
17742
17743 format %{ "subss $dst, $src" %}
17744 ins_cost(150);
17745 ins_encode %{
17746 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17747 %}
17748 ins_pipe(pipe_slow);
17749 %}
17750
17751 instruct subF_mem(regF dst, memory src) %{
17752 predicate(UseAVX == 0);
17753 match(Set dst (SubF dst (LoadF src)));
17754
17755 format %{ "subss $dst, $src" %}
17756 ins_cost(150);
17757 ins_encode %{
17758 __ subss($dst$$XMMRegister, $src$$Address);
17759 %}
17760 ins_pipe(pipe_slow);
17761 %}
17762
17763 instruct subF_imm(regF dst, immF con) %{
17764 predicate(UseAVX == 0);
17765 match(Set dst (SubF dst con));
17766 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17767 ins_cost(150);
17768 ins_encode %{
17769 __ subss($dst$$XMMRegister, $constantaddress($con));
17770 %}
17771 ins_pipe(pipe_slow);
17772 %}
17773
17774 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17775 predicate(UseAVX > 0);
17776 match(Set dst (SubF src1 src2));
17777
17778 format %{ "vsubss $dst, $src1, $src2" %}
17779 ins_cost(150);
17780 ins_encode %{
17781 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17782 %}
17783 ins_pipe(pipe_slow);
17784 %}
17785
17786 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17787 predicate(UseAVX > 0);
17788 match(Set dst (SubF src1 (LoadF src2)));
17789
17790 format %{ "vsubss $dst, $src1, $src2" %}
17791 ins_cost(150);
17792 ins_encode %{
17793 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17794 %}
17795 ins_pipe(pipe_slow);
17796 %}
17797
17798 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17799 predicate(UseAVX > 0);
17800 match(Set dst (SubF src con));
17801
17802 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17803 ins_cost(150);
17804 ins_encode %{
17805 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17806 %}
17807 ins_pipe(pipe_slow);
17808 %}
17809
17810 instruct subD_reg(regD dst, regD src) %{
17811 predicate(UseAVX == 0);
17812 match(Set dst (SubD dst src));
17813
17814 format %{ "subsd $dst, $src" %}
17815 ins_cost(150);
17816 ins_encode %{
17817 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17818 %}
17819 ins_pipe(pipe_slow);
17820 %}
17821
17822 instruct subD_mem(regD dst, memory src) %{
17823 predicate(UseAVX == 0);
17824 match(Set dst (SubD dst (LoadD src)));
17825
17826 format %{ "subsd $dst, $src" %}
17827 ins_cost(150);
17828 ins_encode %{
17829 __ subsd($dst$$XMMRegister, $src$$Address);
17830 %}
17831 ins_pipe(pipe_slow);
17832 %}
17833
17834 instruct subD_imm(regD dst, immD con) %{
17835 predicate(UseAVX == 0);
17836 match(Set dst (SubD dst con));
17837 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17838 ins_cost(150);
17839 ins_encode %{
17840 __ subsd($dst$$XMMRegister, $constantaddress($con));
17841 %}
17842 ins_pipe(pipe_slow);
17843 %}
17844
17845 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17846 predicate(UseAVX > 0);
17847 match(Set dst (SubD src1 src2));
17848
17849 format %{ "vsubsd $dst, $src1, $src2" %}
17850 ins_cost(150);
17851 ins_encode %{
17852 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17853 %}
17854 ins_pipe(pipe_slow);
17855 %}
17856
17857 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17858 predicate(UseAVX > 0);
17859 match(Set dst (SubD src1 (LoadD src2)));
17860
17861 format %{ "vsubsd $dst, $src1, $src2" %}
17862 ins_cost(150);
17863 ins_encode %{
17864 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17865 %}
17866 ins_pipe(pipe_slow);
17867 %}
17868
17869 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17870 predicate(UseAVX > 0);
17871 match(Set dst (SubD src con));
17872
17873 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17874 ins_cost(150);
17875 ins_encode %{
17876 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17877 %}
17878 ins_pipe(pipe_slow);
17879 %}
17880
17881 instruct mulF_reg(regF dst, regF src) %{
17882 predicate(UseAVX == 0);
17883 match(Set dst (MulF dst src));
17884
17885 format %{ "mulss $dst, $src" %}
17886 ins_cost(150);
17887 ins_encode %{
17888 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17889 %}
17890 ins_pipe(pipe_slow);
17891 %}
17892
17893 instruct mulF_mem(regF dst, memory src) %{
17894 predicate(UseAVX == 0);
17895 match(Set dst (MulF dst (LoadF src)));
17896
17897 format %{ "mulss $dst, $src" %}
17898 ins_cost(150);
17899 ins_encode %{
17900 __ mulss($dst$$XMMRegister, $src$$Address);
17901 %}
17902 ins_pipe(pipe_slow);
17903 %}
17904
17905 instruct mulF_imm(regF dst, immF con) %{
17906 predicate(UseAVX == 0);
17907 match(Set dst (MulF dst con));
17908 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17909 ins_cost(150);
17910 ins_encode %{
17911 __ mulss($dst$$XMMRegister, $constantaddress($con));
17912 %}
17913 ins_pipe(pipe_slow);
17914 %}
17915
17916 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17917 predicate(UseAVX > 0);
17918 match(Set dst (MulF src1 src2));
17919
17920 format %{ "vmulss $dst, $src1, $src2" %}
17921 ins_cost(150);
17922 ins_encode %{
17923 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17924 %}
17925 ins_pipe(pipe_slow);
17926 %}
17927
17928 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17929 predicate(UseAVX > 0);
17930 match(Set dst (MulF src1 (LoadF src2)));
17931
17932 format %{ "vmulss $dst, $src1, $src2" %}
17933 ins_cost(150);
17934 ins_encode %{
17935 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17936 %}
17937 ins_pipe(pipe_slow);
17938 %}
17939
17940 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17941 predicate(UseAVX > 0);
17942 match(Set dst (MulF src con));
17943
17944 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17945 ins_cost(150);
17946 ins_encode %{
17947 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17948 %}
17949 ins_pipe(pipe_slow);
17950 %}
17951
17952 instruct mulD_reg(regD dst, regD src) %{
17953 predicate(UseAVX == 0);
17954 match(Set dst (MulD dst src));
17955
17956 format %{ "mulsd $dst, $src" %}
17957 ins_cost(150);
17958 ins_encode %{
17959 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17960 %}
17961 ins_pipe(pipe_slow);
17962 %}
17963
17964 instruct mulD_mem(regD dst, memory src) %{
17965 predicate(UseAVX == 0);
17966 match(Set dst (MulD dst (LoadD src)));
17967
17968 format %{ "mulsd $dst, $src" %}
17969 ins_cost(150);
17970 ins_encode %{
17971 __ mulsd($dst$$XMMRegister, $src$$Address);
17972 %}
17973 ins_pipe(pipe_slow);
17974 %}
17975
17976 instruct mulD_imm(regD dst, immD con) %{
17977 predicate(UseAVX == 0);
17978 match(Set dst (MulD dst con));
17979 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17980 ins_cost(150);
17981 ins_encode %{
17982 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17983 %}
17984 ins_pipe(pipe_slow);
17985 %}
17986
17987 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17988 predicate(UseAVX > 0);
17989 match(Set dst (MulD src1 src2));
17990
17991 format %{ "vmulsd $dst, $src1, $src2" %}
17992 ins_cost(150);
17993 ins_encode %{
17994 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17995 %}
17996 ins_pipe(pipe_slow);
17997 %}
17998
17999 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18000 predicate(UseAVX > 0);
18001 match(Set dst (MulD src1 (LoadD src2)));
18002
18003 format %{ "vmulsd $dst, $src1, $src2" %}
18004 ins_cost(150);
18005 ins_encode %{
18006 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18007 %}
18008 ins_pipe(pipe_slow);
18009 %}
18010
18011 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18012 predicate(UseAVX > 0);
18013 match(Set dst (MulD src con));
18014
18015 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18016 ins_cost(150);
18017 ins_encode %{
18018 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18019 %}
18020 ins_pipe(pipe_slow);
18021 %}
18022
18023 instruct divF_reg(regF dst, regF src) %{
18024 predicate(UseAVX == 0);
18025 match(Set dst (DivF dst src));
18026
18027 format %{ "divss $dst, $src" %}
18028 ins_cost(150);
18029 ins_encode %{
18030 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18031 %}
18032 ins_pipe(pipe_slow);
18033 %}
18034
18035 instruct divF_mem(regF dst, memory src) %{
18036 predicate(UseAVX == 0);
18037 match(Set dst (DivF dst (LoadF src)));
18038
18039 format %{ "divss $dst, $src" %}
18040 ins_cost(150);
18041 ins_encode %{
18042 __ divss($dst$$XMMRegister, $src$$Address);
18043 %}
18044 ins_pipe(pipe_slow);
18045 %}
18046
18047 instruct divF_imm(regF dst, immF con) %{
18048 predicate(UseAVX == 0);
18049 match(Set dst (DivF dst con));
18050 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18051 ins_cost(150);
18052 ins_encode %{
18053 __ divss($dst$$XMMRegister, $constantaddress($con));
18054 %}
18055 ins_pipe(pipe_slow);
18056 %}
18057
18058 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18059 predicate(UseAVX > 0);
18060 match(Set dst (DivF src1 src2));
18061
18062 format %{ "vdivss $dst, $src1, $src2" %}
18063 ins_cost(150);
18064 ins_encode %{
18065 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18066 %}
18067 ins_pipe(pipe_slow);
18068 %}
18069
18070 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18071 predicate(UseAVX > 0);
18072 match(Set dst (DivF src1 (LoadF src2)));
18073
18074 format %{ "vdivss $dst, $src1, $src2" %}
18075 ins_cost(150);
18076 ins_encode %{
18077 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18078 %}
18079 ins_pipe(pipe_slow);
18080 %}
18081
18082 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18083 predicate(UseAVX > 0);
18084 match(Set dst (DivF src con));
18085
18086 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18087 ins_cost(150);
18088 ins_encode %{
18089 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18090 %}
18091 ins_pipe(pipe_slow);
18092 %}
18093
18094 instruct divD_reg(regD dst, regD src) %{
18095 predicate(UseAVX == 0);
18096 match(Set dst (DivD dst src));
18097
18098 format %{ "divsd $dst, $src" %}
18099 ins_cost(150);
18100 ins_encode %{
18101 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18102 %}
18103 ins_pipe(pipe_slow);
18104 %}
18105
18106 instruct divD_mem(regD dst, memory src) %{
18107 predicate(UseAVX == 0);
18108 match(Set dst (DivD dst (LoadD src)));
18109
18110 format %{ "divsd $dst, $src" %}
18111 ins_cost(150);
18112 ins_encode %{
18113 __ divsd($dst$$XMMRegister, $src$$Address);
18114 %}
18115 ins_pipe(pipe_slow);
18116 %}
18117
18118 instruct divD_imm(regD dst, immD con) %{
18119 predicate(UseAVX == 0);
18120 match(Set dst (DivD dst con));
18121 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18122 ins_cost(150);
18123 ins_encode %{
18124 __ divsd($dst$$XMMRegister, $constantaddress($con));
18125 %}
18126 ins_pipe(pipe_slow);
18127 %}
18128
18129 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18130 predicate(UseAVX > 0);
18131 match(Set dst (DivD src1 src2));
18132
18133 format %{ "vdivsd $dst, $src1, $src2" %}
18134 ins_cost(150);
18135 ins_encode %{
18136 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18137 %}
18138 ins_pipe(pipe_slow);
18139 %}
18140
18141 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18142 predicate(UseAVX > 0);
18143 match(Set dst (DivD src1 (LoadD src2)));
18144
18145 format %{ "vdivsd $dst, $src1, $src2" %}
18146 ins_cost(150);
18147 ins_encode %{
18148 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18149 %}
18150 ins_pipe(pipe_slow);
18151 %}
18152
18153 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18154 predicate(UseAVX > 0);
18155 match(Set dst (DivD src con));
18156
18157 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18158 ins_cost(150);
18159 ins_encode %{
18160 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18161 %}
18162 ins_pipe(pipe_slow);
18163 %}
18164
18165 instruct absF_reg(regF dst) %{
18166 predicate(UseAVX == 0);
18167 match(Set dst (AbsF dst));
18168 ins_cost(150);
18169 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18170 ins_encode %{
18171 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18172 %}
18173 ins_pipe(pipe_slow);
18174 %}
18175
18176 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18177 predicate(UseAVX > 0);
18178 match(Set dst (AbsF src));
18179 ins_cost(150);
18180 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18181 ins_encode %{
18182 int vlen_enc = Assembler::AVX_128bit;
18183 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18184 ExternalAddress(float_signmask()), vlen_enc);
18185 %}
18186 ins_pipe(pipe_slow);
18187 %}
18188
18189 instruct absD_reg(regD dst) %{
18190 predicate(UseAVX == 0);
18191 match(Set dst (AbsD dst));
18192 ins_cost(150);
18193 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18194 "# abs double by sign masking" %}
18195 ins_encode %{
18196 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18197 %}
18198 ins_pipe(pipe_slow);
18199 %}
18200
18201 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18202 predicate(UseAVX > 0);
18203 match(Set dst (AbsD src));
18204 ins_cost(150);
18205 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18206 "# abs double by sign masking" %}
18207 ins_encode %{
18208 int vlen_enc = Assembler::AVX_128bit;
18209 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18210 ExternalAddress(double_signmask()), vlen_enc);
18211 %}
18212 ins_pipe(pipe_slow);
18213 %}
18214
18215 instruct negF_reg(regF dst) %{
18216 predicate(UseAVX == 0);
18217 match(Set dst (NegF dst));
18218 ins_cost(150);
18219 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18220 ins_encode %{
18221 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18222 %}
18223 ins_pipe(pipe_slow);
18224 %}
18225
18226 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18227 predicate(UseAVX > 0);
18228 match(Set dst (NegF src));
18229 ins_cost(150);
18230 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18231 ins_encode %{
18232 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18233 ExternalAddress(float_signflip()));
18234 %}
18235 ins_pipe(pipe_slow);
18236 %}
18237
18238 instruct negD_reg(regD dst) %{
18239 predicate(UseAVX == 0);
18240 match(Set dst (NegD dst));
18241 ins_cost(150);
18242 format %{ "xorpd $dst, [0x8000000000000000]\t"
18243 "# neg double by sign flipping" %}
18244 ins_encode %{
18245 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18246 %}
18247 ins_pipe(pipe_slow);
18248 %}
18249
18250 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18251 predicate(UseAVX > 0);
18252 match(Set dst (NegD src));
18253 ins_cost(150);
18254 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18255 "# neg double by sign flipping" %}
18256 ins_encode %{
18257 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18258 ExternalAddress(double_signflip()));
18259 %}
18260 ins_pipe(pipe_slow);
18261 %}
18262
18263 // sqrtss instruction needs destination register to be pre initialized for best performance
18264 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18265 instruct sqrtF_reg(regF dst) %{
18266 match(Set dst (SqrtF dst));
18267 format %{ "sqrtss $dst, $dst" %}
18268 ins_encode %{
18269 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18270 %}
18271 ins_pipe(pipe_slow);
18272 %}
18273
18274 // sqrtsd instruction needs destination register to be pre initialized for best performance
18275 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18276 instruct sqrtD_reg(regD dst) %{
18277 match(Set dst (SqrtD dst));
18278 format %{ "sqrtsd $dst, $dst" %}
18279 ins_encode %{
18280 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18281 %}
18282 ins_pipe(pipe_slow);
18283 %}
18284
18285 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18286 effect(TEMP tmp);
18287 match(Set dst (ConvF2HF src));
18288 ins_cost(125);
18289 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18290 ins_encode %{
18291 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18292 %}
18293 ins_pipe( pipe_slow );
18294 %}
18295
18296 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18297 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18298 effect(TEMP ktmp, TEMP rtmp);
18299 match(Set mem (StoreC mem (ConvF2HF src)));
18300 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18301 ins_encode %{
18302 __ movl($rtmp$$Register, 0x1);
18303 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18304 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18305 %}
18306 ins_pipe( pipe_slow );
18307 %}
18308
18309 instruct vconvF2HF(vec dst, vec src) %{
18310 match(Set dst (VectorCastF2HF src));
18311 format %{ "vector_conv_F2HF $dst $src" %}
18312 ins_encode %{
18313 int vlen_enc = vector_length_encoding(this, $src);
18314 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18315 %}
18316 ins_pipe( pipe_slow );
18317 %}
18318
18319 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18320 predicate(n->as_StoreVector()->memory_size() >= 16);
18321 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18322 format %{ "vcvtps2ph $mem,$src" %}
18323 ins_encode %{
18324 int vlen_enc = vector_length_encoding(this, $src);
18325 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18326 %}
18327 ins_pipe( pipe_slow );
18328 %}
18329
18330 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18331 match(Set dst (ConvHF2F src));
18332 format %{ "vcvtph2ps $dst,$src" %}
18333 ins_encode %{
18334 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18335 %}
18336 ins_pipe( pipe_slow );
18337 %}
18338
18339 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18340 match(Set dst (VectorCastHF2F (LoadVector mem)));
18341 format %{ "vcvtph2ps $dst,$mem" %}
18342 ins_encode %{
18343 int vlen_enc = vector_length_encoding(this);
18344 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18345 %}
18346 ins_pipe( pipe_slow );
18347 %}
18348
18349 instruct vconvHF2F(vec dst, vec src) %{
18350 match(Set dst (VectorCastHF2F src));
18351 ins_cost(125);
18352 format %{ "vector_conv_HF2F $dst,$src" %}
18353 ins_encode %{
18354 int vlen_enc = vector_length_encoding(this);
18355 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18356 %}
18357 ins_pipe( pipe_slow );
18358 %}
18359
18360 // ---------------------------------------- VectorReinterpret ------------------------------------
18361 instruct reinterpret_mask(kReg dst) %{
18362 predicate(n->bottom_type()->isa_vectmask() &&
18363 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18364 match(Set dst (VectorReinterpret dst));
18365 ins_cost(125);
18366 format %{ "vector_reinterpret $dst\t!" %}
18367 ins_encode %{
18368 // empty
18369 %}
18370 ins_pipe( pipe_slow );
18371 %}
18372
18373 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18374 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18375 n->bottom_type()->isa_vectmask() &&
18376 n->in(1)->bottom_type()->isa_vectmask() &&
18377 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18378 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18379 match(Set dst (VectorReinterpret src));
18380 effect(TEMP xtmp);
18381 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18382 ins_encode %{
18383 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18384 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18385 assert(src_sz == dst_sz , "src and dst size mismatch");
18386 int vlen_enc = vector_length_encoding(src_sz);
18387 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18388 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18389 %}
18390 ins_pipe( pipe_slow );
18391 %}
18392
18393 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18394 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18395 n->bottom_type()->isa_vectmask() &&
18396 n->in(1)->bottom_type()->isa_vectmask() &&
18397 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18398 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18399 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18400 match(Set dst (VectorReinterpret src));
18401 effect(TEMP xtmp);
18402 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18403 ins_encode %{
18404 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18405 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18406 assert(src_sz == dst_sz , "src and dst size mismatch");
18407 int vlen_enc = vector_length_encoding(src_sz);
18408 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18409 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18410 %}
18411 ins_pipe( pipe_slow );
18412 %}
18413
18414 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18415 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18416 n->bottom_type()->isa_vectmask() &&
18417 n->in(1)->bottom_type()->isa_vectmask() &&
18418 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18419 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18420 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18421 match(Set dst (VectorReinterpret src));
18422 effect(TEMP xtmp);
18423 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18424 ins_encode %{
18425 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18426 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18427 assert(src_sz == dst_sz , "src and dst size mismatch");
18428 int vlen_enc = vector_length_encoding(src_sz);
18429 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18430 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18431 %}
18432 ins_pipe( pipe_slow );
18433 %}
18434
18435 instruct reinterpret(vec dst) %{
18436 predicate(!n->bottom_type()->isa_vectmask() &&
18437 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18438 match(Set dst (VectorReinterpret dst));
18439 ins_cost(125);
18440 format %{ "vector_reinterpret $dst\t!" %}
18441 ins_encode %{
18442 // empty
18443 %}
18444 ins_pipe( pipe_slow );
18445 %}
18446
18447 instruct reinterpret_expand(vec dst, vec src) %{
18448 predicate(UseAVX == 0 &&
18449 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18450 match(Set dst (VectorReinterpret src));
18451 ins_cost(125);
18452 effect(TEMP dst);
18453 format %{ "vector_reinterpret_expand $dst,$src" %}
18454 ins_encode %{
18455 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18456 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18457
18458 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18459 if (src_vlen_in_bytes == 4) {
18460 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18461 } else {
18462 assert(src_vlen_in_bytes == 8, "");
18463 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18464 }
18465 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18466 %}
18467 ins_pipe( pipe_slow );
18468 %}
18469
18470 instruct vreinterpret_expand4(legVec dst, vec src) %{
18471 predicate(UseAVX > 0 &&
18472 !n->bottom_type()->isa_vectmask() &&
18473 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18474 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18475 match(Set dst (VectorReinterpret src));
18476 ins_cost(125);
18477 format %{ "vector_reinterpret_expand $dst,$src" %}
18478 ins_encode %{
18479 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18480 %}
18481 ins_pipe( pipe_slow );
18482 %}
18483
18484
18485 instruct vreinterpret_expand(legVec dst, vec src) %{
18486 predicate(UseAVX > 0 &&
18487 !n->bottom_type()->isa_vectmask() &&
18488 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18489 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18490 match(Set dst (VectorReinterpret src));
18491 ins_cost(125);
18492 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18493 ins_encode %{
18494 switch (Matcher::vector_length_in_bytes(this, $src)) {
18495 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18496 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18497 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18498 default: ShouldNotReachHere();
18499 }
18500 %}
18501 ins_pipe( pipe_slow );
18502 %}
18503
18504 instruct reinterpret_shrink(vec dst, legVec src) %{
18505 predicate(!n->bottom_type()->isa_vectmask() &&
18506 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18507 match(Set dst (VectorReinterpret src));
18508 ins_cost(125);
18509 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18510 ins_encode %{
18511 switch (Matcher::vector_length_in_bytes(this)) {
18512 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18513 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18514 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18515 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18516 default: ShouldNotReachHere();
18517 }
18518 %}
18519 ins_pipe( pipe_slow );
18520 %}
18521
18522 // ----------------------------------------------------------------------------------------------------
18523
18524 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18525 match(Set dst (RoundDoubleMode src rmode));
18526 format %{ "roundsd $dst,$src" %}
18527 ins_cost(150);
18528 ins_encode %{
18529 assert(UseSSE >= 4, "required");
18530 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18531 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18532 }
18533 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18534 %}
18535 ins_pipe(pipe_slow);
18536 %}
18537
18538 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18539 match(Set dst (RoundDoubleMode con rmode));
18540 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18541 ins_cost(150);
18542 ins_encode %{
18543 assert(UseSSE >= 4, "required");
18544 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18545 %}
18546 ins_pipe(pipe_slow);
18547 %}
18548
18549 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18550 predicate(Matcher::vector_length(n) < 8);
18551 match(Set dst (RoundDoubleModeV src rmode));
18552 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18553 ins_encode %{
18554 assert(UseAVX > 0, "required");
18555 int vlen_enc = vector_length_encoding(this);
18556 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18557 %}
18558 ins_pipe( pipe_slow );
18559 %}
18560
18561 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18562 predicate(Matcher::vector_length(n) == 8);
18563 match(Set dst (RoundDoubleModeV src rmode));
18564 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18565 ins_encode %{
18566 assert(UseAVX > 2, "required");
18567 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18568 %}
18569 ins_pipe( pipe_slow );
18570 %}
18571
18572 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18573 predicate(Matcher::vector_length(n) < 8);
18574 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18575 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18576 ins_encode %{
18577 assert(UseAVX > 0, "required");
18578 int vlen_enc = vector_length_encoding(this);
18579 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18580 %}
18581 ins_pipe( pipe_slow );
18582 %}
18583
18584 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18585 predicate(Matcher::vector_length(n) == 8);
18586 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18587 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18588 ins_encode %{
18589 assert(UseAVX > 2, "required");
18590 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18591 %}
18592 ins_pipe( pipe_slow );
18593 %}
18594
18595 instruct onspinwait() %{
18596 match(OnSpinWait);
18597 ins_cost(200);
18598
18599 format %{
18600 $$template
18601 $$emit$$"pause\t! membar_onspinwait"
18602 %}
18603 ins_encode %{
18604 __ pause();
18605 %}
18606 ins_pipe(pipe_slow);
18607 %}
18608
18609 // a * b + c
18610 instruct fmaD_reg(regD a, regD b, regD c) %{
18611 match(Set c (FmaD c (Binary a b)));
18612 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18613 ins_cost(150);
18614 ins_encode %{
18615 assert(UseFMA, "Needs FMA instructions support.");
18616 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18617 %}
18618 ins_pipe( pipe_slow );
18619 %}
18620
18621 // a * b + c
18622 instruct fmaF_reg(regF a, regF b, regF c) %{
18623 match(Set c (FmaF c (Binary a b)));
18624 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18625 ins_cost(150);
18626 ins_encode %{
18627 assert(UseFMA, "Needs FMA instructions support.");
18628 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18629 %}
18630 ins_pipe( pipe_slow );
18631 %}
18632
18633 // ====================VECTOR INSTRUCTIONS=====================================
18634
18635 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18636 instruct MoveVec2Leg(legVec dst, vec src) %{
18637 match(Set dst src);
18638 format %{ "" %}
18639 ins_encode %{
18640 ShouldNotReachHere();
18641 %}
18642 ins_pipe( fpu_reg_reg );
18643 %}
18644
18645 instruct MoveLeg2Vec(vec dst, legVec src) %{
18646 match(Set dst src);
18647 format %{ "" %}
18648 ins_encode %{
18649 ShouldNotReachHere();
18650 %}
18651 ins_pipe( fpu_reg_reg );
18652 %}
18653
18654 // ============================================================================
18655
18656 // Load vectors generic operand pattern
18657 instruct loadV(vec dst, memory mem) %{
18658 match(Set dst (LoadVector mem));
18659 ins_cost(125);
18660 format %{ "load_vector $dst,$mem" %}
18661 ins_encode %{
18662 BasicType bt = Matcher::vector_element_basic_type(this);
18663 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18664 %}
18665 ins_pipe( pipe_slow );
18666 %}
18667
18668 // Store vectors generic operand pattern.
18669 instruct storeV(memory mem, vec src) %{
18670 match(Set mem (StoreVector mem src));
18671 ins_cost(145);
18672 format %{ "store_vector $mem,$src\n\t" %}
18673 ins_encode %{
18674 switch (Matcher::vector_length_in_bytes(this, $src)) {
18675 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18676 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18677 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18678 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18679 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18680 default: ShouldNotReachHere();
18681 }
18682 %}
18683 ins_pipe( pipe_slow );
18684 %}
18685
18686 // ---------------------------------------- Gather ------------------------------------
18687
18688 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18689
18690 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18691 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18692 Matcher::vector_length_in_bytes(n) <= 32);
18693 match(Set dst (LoadVectorGather mem idx));
18694 effect(TEMP dst, TEMP tmp, TEMP mask);
18695 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18696 ins_encode %{
18697 int vlen_enc = vector_length_encoding(this);
18698 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18699 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18700 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18701 __ lea($tmp$$Register, $mem$$Address);
18702 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18703 %}
18704 ins_pipe( pipe_slow );
18705 %}
18706
18707
18708 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18709 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18710 !is_subword_type(Matcher::vector_element_basic_type(n)));
18711 match(Set dst (LoadVectorGather mem idx));
18712 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18713 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18714 ins_encode %{
18715 int vlen_enc = vector_length_encoding(this);
18716 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18717 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18718 __ lea($tmp$$Register, $mem$$Address);
18719 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18720 %}
18721 ins_pipe( pipe_slow );
18722 %}
18723
18724 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18725 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18726 !is_subword_type(Matcher::vector_element_basic_type(n)));
18727 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18728 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18729 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18730 ins_encode %{
18731 assert(UseAVX > 2, "sanity");
18732 int vlen_enc = vector_length_encoding(this);
18733 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18734 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18735 // Note: Since gather instruction partially updates the opmask register used
18736 // for predication hense moving mask operand to a temporary.
18737 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18738 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18739 __ lea($tmp$$Register, $mem$$Address);
18740 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18741 %}
18742 ins_pipe( pipe_slow );
18743 %}
18744
18745 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18746 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18747 match(Set dst (LoadVectorGather mem idx_base));
18748 effect(TEMP tmp, TEMP rtmp);
18749 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18750 ins_encode %{
18751 int vlen_enc = vector_length_encoding(this);
18752 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18753 __ lea($tmp$$Register, $mem$$Address);
18754 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18755 %}
18756 ins_pipe( pipe_slow );
18757 %}
18758
18759 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18760 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18761 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18762 match(Set dst (LoadVectorGather mem idx_base));
18763 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18764 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18765 ins_encode %{
18766 int vlen_enc = vector_length_encoding(this);
18767 int vector_len = Matcher::vector_length(this);
18768 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18769 __ lea($tmp$$Register, $mem$$Address);
18770 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18771 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18772 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18773 %}
18774 ins_pipe( pipe_slow );
18775 %}
18776
18777 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18778 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18779 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18780 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18781 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18782 ins_encode %{
18783 int vlen_enc = vector_length_encoding(this);
18784 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18785 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18786 __ lea($tmp$$Register, $mem$$Address);
18787 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18788 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18789 %}
18790 ins_pipe( pipe_slow );
18791 %}
18792
18793 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18794 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18795 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18796 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18797 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18798 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18799 ins_encode %{
18800 int vlen_enc = vector_length_encoding(this);
18801 int vector_len = Matcher::vector_length(this);
18802 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18803 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18804 __ lea($tmp$$Register, $mem$$Address);
18805 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18806 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18807 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18808 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18809 %}
18810 ins_pipe( pipe_slow );
18811 %}
18812
18813 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18814 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18815 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18816 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18817 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18818 ins_encode %{
18819 int vlen_enc = vector_length_encoding(this);
18820 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18821 __ lea($tmp$$Register, $mem$$Address);
18822 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18823 if (elem_bt == T_SHORT) {
18824 __ movl($mask_idx$$Register, 0x55555555);
18825 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18826 }
18827 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18828 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18829 %}
18830 ins_pipe( pipe_slow );
18831 %}
18832
18833 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18834 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18835 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18836 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18837 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18838 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18839 ins_encode %{
18840 int vlen_enc = vector_length_encoding(this);
18841 int vector_len = Matcher::vector_length(this);
18842 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18843 __ lea($tmp$$Register, $mem$$Address);
18844 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18845 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18846 if (elem_bt == T_SHORT) {
18847 __ movl($mask_idx$$Register, 0x55555555);
18848 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18849 }
18850 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18851 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18852 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18853 %}
18854 ins_pipe( pipe_slow );
18855 %}
18856
18857 // ====================Scatter=======================================
18858
18859 // Scatter INT, LONG, FLOAT, DOUBLE
18860
18861 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18862 predicate(UseAVX > 2);
18863 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18864 effect(TEMP tmp, TEMP ktmp);
18865 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18866 ins_encode %{
18867 int vlen_enc = vector_length_encoding(this, $src);
18868 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18869
18870 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18871 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18872
18873 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18874 __ lea($tmp$$Register, $mem$$Address);
18875 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18876 %}
18877 ins_pipe( pipe_slow );
18878 %}
18879
18880 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18881 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18882 effect(TEMP tmp, TEMP ktmp);
18883 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18884 ins_encode %{
18885 int vlen_enc = vector_length_encoding(this, $src);
18886 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18887 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18888 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18889 // Note: Since scatter instruction partially updates the opmask register used
18890 // for predication hense moving mask operand to a temporary.
18891 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18892 __ lea($tmp$$Register, $mem$$Address);
18893 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18894 %}
18895 ins_pipe( pipe_slow );
18896 %}
18897
18898 // ====================REPLICATE=======================================
18899
18900 // Replicate byte scalar to be vector
18901 instruct vReplB_reg(vec dst, rRegI src) %{
18902 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18903 match(Set dst (Replicate src));
18904 format %{ "replicateB $dst,$src" %}
18905 ins_encode %{
18906 uint vlen = Matcher::vector_length(this);
18907 if (UseAVX >= 2) {
18908 int vlen_enc = vector_length_encoding(this);
18909 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18910 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18911 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18912 } else {
18913 __ movdl($dst$$XMMRegister, $src$$Register);
18914 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18915 }
18916 } else {
18917 assert(UseAVX < 2, "");
18918 __ movdl($dst$$XMMRegister, $src$$Register);
18919 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18920 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18921 if (vlen >= 16) {
18922 assert(vlen == 16, "");
18923 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18924 }
18925 }
18926 %}
18927 ins_pipe( pipe_slow );
18928 %}
18929
18930 instruct ReplB_mem(vec dst, memory mem) %{
18931 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18932 match(Set dst (Replicate (LoadB mem)));
18933 format %{ "replicateB $dst,$mem" %}
18934 ins_encode %{
18935 int vlen_enc = vector_length_encoding(this);
18936 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18937 %}
18938 ins_pipe( pipe_slow );
18939 %}
18940
18941 // ====================ReplicateS=======================================
18942
18943 instruct vReplS_reg(vec dst, rRegI src) %{
18944 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18945 match(Set dst (Replicate src));
18946 format %{ "replicateS $dst,$src" %}
18947 ins_encode %{
18948 uint vlen = Matcher::vector_length(this);
18949 int vlen_enc = vector_length_encoding(this);
18950 if (UseAVX >= 2) {
18951 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18952 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18953 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18954 } else {
18955 __ movdl($dst$$XMMRegister, $src$$Register);
18956 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18957 }
18958 } else {
18959 assert(UseAVX < 2, "");
18960 __ movdl($dst$$XMMRegister, $src$$Register);
18961 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18962 if (vlen >= 8) {
18963 assert(vlen == 8, "");
18964 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18965 }
18966 }
18967 %}
18968 ins_pipe( pipe_slow );
18969 %}
18970
18971 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18972 match(Set dst (Replicate con));
18973 effect(TEMP rtmp);
18974 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18975 ins_encode %{
18976 int vlen_enc = vector_length_encoding(this);
18977 BasicType bt = Matcher::vector_element_basic_type(this);
18978 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18979 __ movl($rtmp$$Register, $con$$constant);
18980 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18981 %}
18982 ins_pipe( pipe_slow );
18983 %}
18984
18985 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18986 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18987 match(Set dst (Replicate src));
18988 effect(TEMP rtmp);
18989 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18990 ins_encode %{
18991 int vlen_enc = vector_length_encoding(this);
18992 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18993 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18994 %}
18995 ins_pipe( pipe_slow );
18996 %}
18997
18998 instruct ReplS_mem(vec dst, memory mem) %{
18999 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19000 match(Set dst (Replicate (LoadS mem)));
19001 format %{ "replicateS $dst,$mem" %}
19002 ins_encode %{
19003 int vlen_enc = vector_length_encoding(this);
19004 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19005 %}
19006 ins_pipe( pipe_slow );
19007 %}
19008
19009 // ====================ReplicateI=======================================
19010
19011 instruct ReplI_reg(vec dst, rRegI src) %{
19012 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19013 match(Set dst (Replicate src));
19014 format %{ "replicateI $dst,$src" %}
19015 ins_encode %{
19016 uint vlen = Matcher::vector_length(this);
19017 int vlen_enc = vector_length_encoding(this);
19018 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19019 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19020 } else if (VM_Version::supports_avx2()) {
19021 __ movdl($dst$$XMMRegister, $src$$Register);
19022 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19023 } else {
19024 __ movdl($dst$$XMMRegister, $src$$Register);
19025 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19026 }
19027 %}
19028 ins_pipe( pipe_slow );
19029 %}
19030
19031 instruct ReplI_mem(vec dst, memory mem) %{
19032 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19033 match(Set dst (Replicate (LoadI mem)));
19034 format %{ "replicateI $dst,$mem" %}
19035 ins_encode %{
19036 int vlen_enc = vector_length_encoding(this);
19037 if (VM_Version::supports_avx2()) {
19038 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19039 } else if (VM_Version::supports_avx()) {
19040 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19041 } else {
19042 __ movdl($dst$$XMMRegister, $mem$$Address);
19043 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19044 }
19045 %}
19046 ins_pipe( pipe_slow );
19047 %}
19048
19049 instruct ReplI_imm(vec dst, immI con) %{
19050 predicate(Matcher::is_non_long_integral_vector(n));
19051 match(Set dst (Replicate con));
19052 format %{ "replicateI $dst,$con" %}
19053 ins_encode %{
19054 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19055 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19056 type2aelembytes(Matcher::vector_element_basic_type(this))));
19057 BasicType bt = Matcher::vector_element_basic_type(this);
19058 int vlen = Matcher::vector_length_in_bytes(this);
19059 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19060 %}
19061 ins_pipe( pipe_slow );
19062 %}
19063
19064 // Replicate scalar zero to be vector
19065 instruct ReplI_zero(vec dst, immI_0 zero) %{
19066 predicate(Matcher::is_non_long_integral_vector(n));
19067 match(Set dst (Replicate zero));
19068 format %{ "replicateI $dst,$zero" %}
19069 ins_encode %{
19070 int vlen_enc = vector_length_encoding(this);
19071 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19072 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19073 } else {
19074 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19075 }
19076 %}
19077 ins_pipe( fpu_reg_reg );
19078 %}
19079
19080 instruct ReplI_M1(vec dst, immI_M1 con) %{
19081 predicate(Matcher::is_non_long_integral_vector(n));
19082 match(Set dst (Replicate con));
19083 format %{ "vallones $dst" %}
19084 ins_encode %{
19085 int vector_len = vector_length_encoding(this);
19086 __ vallones($dst$$XMMRegister, vector_len);
19087 %}
19088 ins_pipe( pipe_slow );
19089 %}
19090
19091 // ====================ReplicateL=======================================
19092
19093 // Replicate long (8 byte) scalar to be vector
19094 instruct ReplL_reg(vec dst, rRegL src) %{
19095 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19096 match(Set dst (Replicate src));
19097 format %{ "replicateL $dst,$src" %}
19098 ins_encode %{
19099 int vlen = Matcher::vector_length(this);
19100 int vlen_enc = vector_length_encoding(this);
19101 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19102 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19103 } else if (VM_Version::supports_avx2()) {
19104 __ movdq($dst$$XMMRegister, $src$$Register);
19105 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19106 } else {
19107 __ movdq($dst$$XMMRegister, $src$$Register);
19108 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19109 }
19110 %}
19111 ins_pipe( pipe_slow );
19112 %}
19113
19114 instruct ReplL_mem(vec dst, memory mem) %{
19115 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19116 match(Set dst (Replicate (LoadL mem)));
19117 format %{ "replicateL $dst,$mem" %}
19118 ins_encode %{
19119 int vlen_enc = vector_length_encoding(this);
19120 if (VM_Version::supports_avx2()) {
19121 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19122 } else if (VM_Version::supports_sse3()) {
19123 __ movddup($dst$$XMMRegister, $mem$$Address);
19124 } else {
19125 __ movq($dst$$XMMRegister, $mem$$Address);
19126 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19127 }
19128 %}
19129 ins_pipe( pipe_slow );
19130 %}
19131
19132 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19133 instruct ReplL_imm(vec dst, immL con) %{
19134 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19135 match(Set dst (Replicate con));
19136 format %{ "replicateL $dst,$con" %}
19137 ins_encode %{
19138 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19139 int vlen = Matcher::vector_length_in_bytes(this);
19140 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19141 %}
19142 ins_pipe( pipe_slow );
19143 %}
19144
19145 instruct ReplL_zero(vec dst, immL0 zero) %{
19146 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19147 match(Set dst (Replicate zero));
19148 format %{ "replicateL $dst,$zero" %}
19149 ins_encode %{
19150 int vlen_enc = vector_length_encoding(this);
19151 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19152 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19153 } else {
19154 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19155 }
19156 %}
19157 ins_pipe( fpu_reg_reg );
19158 %}
19159
19160 instruct ReplL_M1(vec dst, immL_M1 con) %{
19161 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19162 match(Set dst (Replicate con));
19163 format %{ "vallones $dst" %}
19164 ins_encode %{
19165 int vector_len = vector_length_encoding(this);
19166 __ vallones($dst$$XMMRegister, vector_len);
19167 %}
19168 ins_pipe( pipe_slow );
19169 %}
19170
19171 // ====================ReplicateF=======================================
19172
19173 instruct vReplF_reg(vec dst, vlRegF src) %{
19174 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19175 match(Set dst (Replicate src));
19176 format %{ "replicateF $dst,$src" %}
19177 ins_encode %{
19178 uint vlen = Matcher::vector_length(this);
19179 int vlen_enc = vector_length_encoding(this);
19180 if (vlen <= 4) {
19181 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19182 } else if (VM_Version::supports_avx2()) {
19183 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19184 } else {
19185 assert(vlen == 8, "sanity");
19186 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19187 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19188 }
19189 %}
19190 ins_pipe( pipe_slow );
19191 %}
19192
19193 instruct ReplF_reg(vec dst, vlRegF src) %{
19194 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19195 match(Set dst (Replicate src));
19196 format %{ "replicateF $dst,$src" %}
19197 ins_encode %{
19198 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19199 %}
19200 ins_pipe( pipe_slow );
19201 %}
19202
19203 instruct ReplF_mem(vec dst, memory mem) %{
19204 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19205 match(Set dst (Replicate (LoadF mem)));
19206 format %{ "replicateF $dst,$mem" %}
19207 ins_encode %{
19208 int vlen_enc = vector_length_encoding(this);
19209 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19210 %}
19211 ins_pipe( pipe_slow );
19212 %}
19213
19214 // Replicate float scalar immediate to be vector by loading from const table.
19215 instruct ReplF_imm(vec dst, immF con) %{
19216 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19217 match(Set dst (Replicate con));
19218 format %{ "replicateF $dst,$con" %}
19219 ins_encode %{
19220 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19221 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19222 int vlen = Matcher::vector_length_in_bytes(this);
19223 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19224 %}
19225 ins_pipe( pipe_slow );
19226 %}
19227
19228 instruct ReplF_zero(vec dst, immF0 zero) %{
19229 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19230 match(Set dst (Replicate zero));
19231 format %{ "replicateF $dst,$zero" %}
19232 ins_encode %{
19233 int vlen_enc = vector_length_encoding(this);
19234 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19235 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19236 } else {
19237 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19238 }
19239 %}
19240 ins_pipe( fpu_reg_reg );
19241 %}
19242
19243 // ====================ReplicateD=======================================
19244
19245 // Replicate double (8 bytes) scalar to be vector
19246 instruct vReplD_reg(vec dst, vlRegD src) %{
19247 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19248 match(Set dst (Replicate src));
19249 format %{ "replicateD $dst,$src" %}
19250 ins_encode %{
19251 uint vlen = Matcher::vector_length(this);
19252 int vlen_enc = vector_length_encoding(this);
19253 if (vlen <= 2) {
19254 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19255 } else if (VM_Version::supports_avx2()) {
19256 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19257 } else {
19258 assert(vlen == 4, "sanity");
19259 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19260 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19261 }
19262 %}
19263 ins_pipe( pipe_slow );
19264 %}
19265
19266 instruct ReplD_reg(vec dst, vlRegD src) %{
19267 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19268 match(Set dst (Replicate src));
19269 format %{ "replicateD $dst,$src" %}
19270 ins_encode %{
19271 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19272 %}
19273 ins_pipe( pipe_slow );
19274 %}
19275
19276 instruct ReplD_mem(vec dst, memory mem) %{
19277 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19278 match(Set dst (Replicate (LoadD mem)));
19279 format %{ "replicateD $dst,$mem" %}
19280 ins_encode %{
19281 if (Matcher::vector_length(this) >= 4) {
19282 int vlen_enc = vector_length_encoding(this);
19283 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19284 } else {
19285 __ movddup($dst$$XMMRegister, $mem$$Address);
19286 }
19287 %}
19288 ins_pipe( pipe_slow );
19289 %}
19290
19291 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19292 instruct ReplD_imm(vec dst, immD con) %{
19293 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19294 match(Set dst (Replicate con));
19295 format %{ "replicateD $dst,$con" %}
19296 ins_encode %{
19297 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19298 int vlen = Matcher::vector_length_in_bytes(this);
19299 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19300 %}
19301 ins_pipe( pipe_slow );
19302 %}
19303
19304 instruct ReplD_zero(vec dst, immD0 zero) %{
19305 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19306 match(Set dst (Replicate zero));
19307 format %{ "replicateD $dst,$zero" %}
19308 ins_encode %{
19309 int vlen_enc = vector_length_encoding(this);
19310 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19311 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19312 } else {
19313 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19314 }
19315 %}
19316 ins_pipe( fpu_reg_reg );
19317 %}
19318
19319 // ====================VECTOR INSERT=======================================
19320
19321 instruct insert(vec dst, rRegI val, immU8 idx) %{
19322 predicate(Matcher::vector_length_in_bytes(n) < 32);
19323 match(Set dst (VectorInsert (Binary dst val) idx));
19324 format %{ "vector_insert $dst,$val,$idx" %}
19325 ins_encode %{
19326 assert(UseSSE >= 4, "required");
19327 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19328
19329 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19330
19331 assert(is_integral_type(elem_bt), "");
19332 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19333
19334 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19335 %}
19336 ins_pipe( pipe_slow );
19337 %}
19338
19339 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19340 predicate(Matcher::vector_length_in_bytes(n) == 32);
19341 match(Set dst (VectorInsert (Binary src val) idx));
19342 effect(TEMP vtmp);
19343 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19344 ins_encode %{
19345 int vlen_enc = Assembler::AVX_256bit;
19346 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19347 int elem_per_lane = 16/type2aelembytes(elem_bt);
19348 int log2epr = log2(elem_per_lane);
19349
19350 assert(is_integral_type(elem_bt), "sanity");
19351 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19352
19353 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19354 uint y_idx = ($idx$$constant >> log2epr) & 1;
19355 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19356 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19357 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19358 %}
19359 ins_pipe( pipe_slow );
19360 %}
19361
19362 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19363 predicate(Matcher::vector_length_in_bytes(n) == 64);
19364 match(Set dst (VectorInsert (Binary src val) idx));
19365 effect(TEMP vtmp);
19366 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19367 ins_encode %{
19368 assert(UseAVX > 2, "sanity");
19369
19370 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19371 int elem_per_lane = 16/type2aelembytes(elem_bt);
19372 int log2epr = log2(elem_per_lane);
19373
19374 assert(is_integral_type(elem_bt), "");
19375 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19376
19377 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19378 uint y_idx = ($idx$$constant >> log2epr) & 3;
19379 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19380 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19381 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19382 %}
19383 ins_pipe( pipe_slow );
19384 %}
19385
19386 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19387 predicate(Matcher::vector_length(n) == 2);
19388 match(Set dst (VectorInsert (Binary dst val) idx));
19389 format %{ "vector_insert $dst,$val,$idx" %}
19390 ins_encode %{
19391 assert(UseSSE >= 4, "required");
19392 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19393 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19394
19395 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19396 %}
19397 ins_pipe( pipe_slow );
19398 %}
19399
19400 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19401 predicate(Matcher::vector_length(n) == 4);
19402 match(Set dst (VectorInsert (Binary src val) idx));
19403 effect(TEMP vtmp);
19404 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19405 ins_encode %{
19406 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19407 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19408
19409 uint x_idx = $idx$$constant & right_n_bits(1);
19410 uint y_idx = ($idx$$constant >> 1) & 1;
19411 int vlen_enc = Assembler::AVX_256bit;
19412 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19413 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19414 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19415 %}
19416 ins_pipe( pipe_slow );
19417 %}
19418
19419 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19420 predicate(Matcher::vector_length(n) == 8);
19421 match(Set dst (VectorInsert (Binary src val) idx));
19422 effect(TEMP vtmp);
19423 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19424 ins_encode %{
19425 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19426 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19427
19428 uint x_idx = $idx$$constant & right_n_bits(1);
19429 uint y_idx = ($idx$$constant >> 1) & 3;
19430 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19431 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19432 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19433 %}
19434 ins_pipe( pipe_slow );
19435 %}
19436
19437 instruct insertF(vec dst, regF val, immU8 idx) %{
19438 predicate(Matcher::vector_length(n) < 8);
19439 match(Set dst (VectorInsert (Binary dst val) idx));
19440 format %{ "vector_insert $dst,$val,$idx" %}
19441 ins_encode %{
19442 assert(UseSSE >= 4, "sanity");
19443
19444 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19445 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19446
19447 uint x_idx = $idx$$constant & right_n_bits(2);
19448 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19449 %}
19450 ins_pipe( pipe_slow );
19451 %}
19452
19453 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19454 predicate(Matcher::vector_length(n) >= 8);
19455 match(Set dst (VectorInsert (Binary src val) idx));
19456 effect(TEMP vtmp);
19457 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19458 ins_encode %{
19459 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19460 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19461
19462 int vlen = Matcher::vector_length(this);
19463 uint x_idx = $idx$$constant & right_n_bits(2);
19464 if (vlen == 8) {
19465 uint y_idx = ($idx$$constant >> 2) & 1;
19466 int vlen_enc = Assembler::AVX_256bit;
19467 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19468 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19469 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19470 } else {
19471 assert(vlen == 16, "sanity");
19472 uint y_idx = ($idx$$constant >> 2) & 3;
19473 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19474 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19475 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19476 }
19477 %}
19478 ins_pipe( pipe_slow );
19479 %}
19480
19481 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19482 predicate(Matcher::vector_length(n) == 2);
19483 match(Set dst (VectorInsert (Binary dst val) idx));
19484 effect(TEMP tmp);
19485 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19486 ins_encode %{
19487 assert(UseSSE >= 4, "sanity");
19488 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19489 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19490
19491 __ movq($tmp$$Register, $val$$XMMRegister);
19492 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19493 %}
19494 ins_pipe( pipe_slow );
19495 %}
19496
19497 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19498 predicate(Matcher::vector_length(n) == 4);
19499 match(Set dst (VectorInsert (Binary src val) idx));
19500 effect(TEMP vtmp, TEMP tmp);
19501 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19502 ins_encode %{
19503 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19504 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19505
19506 uint x_idx = $idx$$constant & right_n_bits(1);
19507 uint y_idx = ($idx$$constant >> 1) & 1;
19508 int vlen_enc = Assembler::AVX_256bit;
19509 __ movq($tmp$$Register, $val$$XMMRegister);
19510 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19511 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19512 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19513 %}
19514 ins_pipe( pipe_slow );
19515 %}
19516
19517 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19518 predicate(Matcher::vector_length(n) == 8);
19519 match(Set dst (VectorInsert (Binary src val) idx));
19520 effect(TEMP tmp, TEMP vtmp);
19521 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19522 ins_encode %{
19523 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19524 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19525
19526 uint x_idx = $idx$$constant & right_n_bits(1);
19527 uint y_idx = ($idx$$constant >> 1) & 3;
19528 __ movq($tmp$$Register, $val$$XMMRegister);
19529 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19530 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19531 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19532 %}
19533 ins_pipe( pipe_slow );
19534 %}
19535
19536 // ====================REDUCTION ARITHMETIC=======================================
19537
19538 // =======================Int Reduction==========================================
19539
19540 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19541 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19542 match(Set dst (AddReductionVI src1 src2));
19543 match(Set dst (MulReductionVI src1 src2));
19544 match(Set dst (AndReductionV src1 src2));
19545 match(Set dst ( OrReductionV src1 src2));
19546 match(Set dst (XorReductionV src1 src2));
19547 match(Set dst (MinReductionV src1 src2));
19548 match(Set dst (MaxReductionV src1 src2));
19549 effect(TEMP vtmp1, TEMP vtmp2);
19550 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19551 ins_encode %{
19552 int opcode = this->ideal_Opcode();
19553 int vlen = Matcher::vector_length(this, $src2);
19554 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19555 %}
19556 ins_pipe( pipe_slow );
19557 %}
19558
19559 // =======================Long Reduction==========================================
19560
19561 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19562 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19563 match(Set dst (AddReductionVL src1 src2));
19564 match(Set dst (MulReductionVL src1 src2));
19565 match(Set dst (AndReductionV src1 src2));
19566 match(Set dst ( OrReductionV src1 src2));
19567 match(Set dst (XorReductionV src1 src2));
19568 match(Set dst (MinReductionV src1 src2));
19569 match(Set dst (MaxReductionV src1 src2));
19570 effect(TEMP vtmp1, TEMP vtmp2);
19571 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19572 ins_encode %{
19573 int opcode = this->ideal_Opcode();
19574 int vlen = Matcher::vector_length(this, $src2);
19575 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19576 %}
19577 ins_pipe( pipe_slow );
19578 %}
19579
19580 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19581 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19582 match(Set dst (AddReductionVL src1 src2));
19583 match(Set dst (MulReductionVL src1 src2));
19584 match(Set dst (AndReductionV src1 src2));
19585 match(Set dst ( OrReductionV src1 src2));
19586 match(Set dst (XorReductionV src1 src2));
19587 match(Set dst (MinReductionV src1 src2));
19588 match(Set dst (MaxReductionV src1 src2));
19589 effect(TEMP vtmp1, TEMP vtmp2);
19590 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19591 ins_encode %{
19592 int opcode = this->ideal_Opcode();
19593 int vlen = Matcher::vector_length(this, $src2);
19594 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19595 %}
19596 ins_pipe( pipe_slow );
19597 %}
19598
19599 // =======================Float Reduction==========================================
19600
19601 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19602 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19603 match(Set dst (AddReductionVF dst src));
19604 match(Set dst (MulReductionVF dst src));
19605 effect(TEMP dst, TEMP vtmp);
19606 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19607 ins_encode %{
19608 int opcode = this->ideal_Opcode();
19609 int vlen = Matcher::vector_length(this, $src);
19610 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19611 %}
19612 ins_pipe( pipe_slow );
19613 %}
19614
19615 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19616 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19617 match(Set dst (AddReductionVF dst src));
19618 match(Set dst (MulReductionVF dst src));
19619 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19620 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19621 ins_encode %{
19622 int opcode = this->ideal_Opcode();
19623 int vlen = Matcher::vector_length(this, $src);
19624 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19625 %}
19626 ins_pipe( pipe_slow );
19627 %}
19628
19629 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19630 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19631 match(Set dst (AddReductionVF dst src));
19632 match(Set dst (MulReductionVF dst src));
19633 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19634 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19635 ins_encode %{
19636 int opcode = this->ideal_Opcode();
19637 int vlen = Matcher::vector_length(this, $src);
19638 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19639 %}
19640 ins_pipe( pipe_slow );
19641 %}
19642
19643
19644 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19645 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19646 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19647 // src1 contains reduction identity
19648 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19649 match(Set dst (AddReductionVF src1 src2));
19650 match(Set dst (MulReductionVF src1 src2));
19651 effect(TEMP dst);
19652 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19653 ins_encode %{
19654 int opcode = this->ideal_Opcode();
19655 int vlen = Matcher::vector_length(this, $src2);
19656 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19657 %}
19658 ins_pipe( pipe_slow );
19659 %}
19660
19661 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19662 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19663 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19664 // src1 contains reduction identity
19665 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19666 match(Set dst (AddReductionVF src1 src2));
19667 match(Set dst (MulReductionVF src1 src2));
19668 effect(TEMP dst, TEMP vtmp);
19669 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19670 ins_encode %{
19671 int opcode = this->ideal_Opcode();
19672 int vlen = Matcher::vector_length(this, $src2);
19673 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19674 %}
19675 ins_pipe( pipe_slow );
19676 %}
19677
19678 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19679 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19680 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19681 // src1 contains reduction identity
19682 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19683 match(Set dst (AddReductionVF src1 src2));
19684 match(Set dst (MulReductionVF src1 src2));
19685 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19686 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19687 ins_encode %{
19688 int opcode = this->ideal_Opcode();
19689 int vlen = Matcher::vector_length(this, $src2);
19690 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19691 %}
19692 ins_pipe( pipe_slow );
19693 %}
19694
19695 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19696 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19697 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19698 // src1 contains reduction identity
19699 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19700 match(Set dst (AddReductionVF src1 src2));
19701 match(Set dst (MulReductionVF src1 src2));
19702 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19703 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19704 ins_encode %{
19705 int opcode = this->ideal_Opcode();
19706 int vlen = Matcher::vector_length(this, $src2);
19707 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19708 %}
19709 ins_pipe( pipe_slow );
19710 %}
19711
19712 // =======================Double Reduction==========================================
19713
19714 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19715 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19716 match(Set dst (AddReductionVD dst src));
19717 match(Set dst (MulReductionVD dst src));
19718 effect(TEMP dst, TEMP vtmp);
19719 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19720 ins_encode %{
19721 int opcode = this->ideal_Opcode();
19722 int vlen = Matcher::vector_length(this, $src);
19723 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19724 %}
19725 ins_pipe( pipe_slow );
19726 %}
19727
19728 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19729 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19730 match(Set dst (AddReductionVD dst src));
19731 match(Set dst (MulReductionVD dst src));
19732 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19733 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19734 ins_encode %{
19735 int opcode = this->ideal_Opcode();
19736 int vlen = Matcher::vector_length(this, $src);
19737 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19738 %}
19739 ins_pipe( pipe_slow );
19740 %}
19741
19742 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19743 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19744 match(Set dst (AddReductionVD dst src));
19745 match(Set dst (MulReductionVD dst src));
19746 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19747 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19748 ins_encode %{
19749 int opcode = this->ideal_Opcode();
19750 int vlen = Matcher::vector_length(this, $src);
19751 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19752 %}
19753 ins_pipe( pipe_slow );
19754 %}
19755
19756 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19757 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19758 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19759 // src1 contains reduction identity
19760 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19761 match(Set dst (AddReductionVD src1 src2));
19762 match(Set dst (MulReductionVD src1 src2));
19763 effect(TEMP dst);
19764 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19765 ins_encode %{
19766 int opcode = this->ideal_Opcode();
19767 int vlen = Matcher::vector_length(this, $src2);
19768 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19769 %}
19770 ins_pipe( pipe_slow );
19771 %}
19772
19773 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19774 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19775 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19776 // src1 contains reduction identity
19777 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19778 match(Set dst (AddReductionVD src1 src2));
19779 match(Set dst (MulReductionVD src1 src2));
19780 effect(TEMP dst, TEMP vtmp);
19781 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19782 ins_encode %{
19783 int opcode = this->ideal_Opcode();
19784 int vlen = Matcher::vector_length(this, $src2);
19785 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19786 %}
19787 ins_pipe( pipe_slow );
19788 %}
19789
19790 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19791 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19792 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19793 // src1 contains reduction identity
19794 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19795 match(Set dst (AddReductionVD src1 src2));
19796 match(Set dst (MulReductionVD src1 src2));
19797 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19798 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19799 ins_encode %{
19800 int opcode = this->ideal_Opcode();
19801 int vlen = Matcher::vector_length(this, $src2);
19802 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19803 %}
19804 ins_pipe( pipe_slow );
19805 %}
19806
19807 // =======================Byte Reduction==========================================
19808
19809 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19810 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19811 match(Set dst (AddReductionVI src1 src2));
19812 match(Set dst (AndReductionV src1 src2));
19813 match(Set dst ( OrReductionV src1 src2));
19814 match(Set dst (XorReductionV src1 src2));
19815 match(Set dst (MinReductionV src1 src2));
19816 match(Set dst (MaxReductionV src1 src2));
19817 effect(TEMP vtmp1, TEMP vtmp2);
19818 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19819 ins_encode %{
19820 int opcode = this->ideal_Opcode();
19821 int vlen = Matcher::vector_length(this, $src2);
19822 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19823 %}
19824 ins_pipe( pipe_slow );
19825 %}
19826
19827 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19828 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19829 match(Set dst (AddReductionVI src1 src2));
19830 match(Set dst (AndReductionV src1 src2));
19831 match(Set dst ( OrReductionV src1 src2));
19832 match(Set dst (XorReductionV src1 src2));
19833 match(Set dst (MinReductionV src1 src2));
19834 match(Set dst (MaxReductionV src1 src2));
19835 effect(TEMP vtmp1, TEMP vtmp2);
19836 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19837 ins_encode %{
19838 int opcode = this->ideal_Opcode();
19839 int vlen = Matcher::vector_length(this, $src2);
19840 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19841 %}
19842 ins_pipe( pipe_slow );
19843 %}
19844
19845 // =======================Short Reduction==========================================
19846
19847 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19848 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19849 match(Set dst (AddReductionVI src1 src2));
19850 match(Set dst (MulReductionVI src1 src2));
19851 match(Set dst (AndReductionV src1 src2));
19852 match(Set dst ( OrReductionV src1 src2));
19853 match(Set dst (XorReductionV src1 src2));
19854 match(Set dst (MinReductionV src1 src2));
19855 match(Set dst (MaxReductionV src1 src2));
19856 effect(TEMP vtmp1, TEMP vtmp2);
19857 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19858 ins_encode %{
19859 int opcode = this->ideal_Opcode();
19860 int vlen = Matcher::vector_length(this, $src2);
19861 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19862 %}
19863 ins_pipe( pipe_slow );
19864 %}
19865
19866 // =======================Mul Reduction==========================================
19867
19868 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19869 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19870 Matcher::vector_length(n->in(2)) <= 32); // src2
19871 match(Set dst (MulReductionVI src1 src2));
19872 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19873 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19874 ins_encode %{
19875 int opcode = this->ideal_Opcode();
19876 int vlen = Matcher::vector_length(this, $src2);
19877 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19878 %}
19879 ins_pipe( pipe_slow );
19880 %}
19881
19882 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19883 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19884 Matcher::vector_length(n->in(2)) == 64); // src2
19885 match(Set dst (MulReductionVI src1 src2));
19886 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19887 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19888 ins_encode %{
19889 int opcode = this->ideal_Opcode();
19890 int vlen = Matcher::vector_length(this, $src2);
19891 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19892 %}
19893 ins_pipe( pipe_slow );
19894 %}
19895
19896 //--------------------Min/Max Float Reduction --------------------
19897 // Float Min Reduction
19898 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19899 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19900 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19901 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19902 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19903 Matcher::vector_length(n->in(2)) == 2);
19904 match(Set dst (MinReductionV src1 src2));
19905 match(Set dst (MaxReductionV src1 src2));
19906 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19907 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19908 ins_encode %{
19909 assert(UseAVX > 0, "sanity");
19910
19911 int opcode = this->ideal_Opcode();
19912 int vlen = Matcher::vector_length(this, $src2);
19913 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19914 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19915 %}
19916 ins_pipe( pipe_slow );
19917 %}
19918
19919 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19920 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19921 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19922 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19923 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19924 Matcher::vector_length(n->in(2)) >= 4);
19925 match(Set dst (MinReductionV src1 src2));
19926 match(Set dst (MaxReductionV src1 src2));
19927 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19928 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19929 ins_encode %{
19930 assert(UseAVX > 0, "sanity");
19931
19932 int opcode = this->ideal_Opcode();
19933 int vlen = Matcher::vector_length(this, $src2);
19934 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19935 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19936 %}
19937 ins_pipe( pipe_slow );
19938 %}
19939
19940 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19941 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19942 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19943 Matcher::vector_length(n->in(2)) == 2);
19944 match(Set dst (MinReductionV dst src));
19945 match(Set dst (MaxReductionV dst src));
19946 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19947 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19948 ins_encode %{
19949 assert(UseAVX > 0, "sanity");
19950
19951 int opcode = this->ideal_Opcode();
19952 int vlen = Matcher::vector_length(this, $src);
19953 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19954 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19955 %}
19956 ins_pipe( pipe_slow );
19957 %}
19958
19959
19960 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19961 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19962 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19963 Matcher::vector_length(n->in(2)) >= 4);
19964 match(Set dst (MinReductionV dst src));
19965 match(Set dst (MaxReductionV dst src));
19966 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19967 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19968 ins_encode %{
19969 assert(UseAVX > 0, "sanity");
19970
19971 int opcode = this->ideal_Opcode();
19972 int vlen = Matcher::vector_length(this, $src);
19973 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19974 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19975 %}
19976 ins_pipe( pipe_slow );
19977 %}
19978
19979 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19980 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19981 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19982 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19983 Matcher::vector_length(n->in(2)) == 2);
19984 match(Set dst (MinReductionV src1 src2));
19985 match(Set dst (MaxReductionV src1 src2));
19986 effect(TEMP dst, TEMP xtmp1);
19987 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19988 ins_encode %{
19989 int opcode = this->ideal_Opcode();
19990 int vlen = Matcher::vector_length(this, $src2);
19991 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19992 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19993 %}
19994 ins_pipe( pipe_slow );
19995 %}
19996
19997 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19998 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19999 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20000 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20001 Matcher::vector_length(n->in(2)) >= 4);
20002 match(Set dst (MinReductionV src1 src2));
20003 match(Set dst (MaxReductionV src1 src2));
20004 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20005 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20006 ins_encode %{
20007 int opcode = this->ideal_Opcode();
20008 int vlen = Matcher::vector_length(this, $src2);
20009 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20010 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20011 %}
20012 ins_pipe( pipe_slow );
20013 %}
20014
20015 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20016 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20017 Matcher::vector_length(n->in(2)) == 2);
20018 match(Set dst (MinReductionV dst src));
20019 match(Set dst (MaxReductionV dst src));
20020 effect(TEMP dst, TEMP xtmp1);
20021 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20022 ins_encode %{
20023 int opcode = this->ideal_Opcode();
20024 int vlen = Matcher::vector_length(this, $src);
20025 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20026 $xtmp1$$XMMRegister);
20027 %}
20028 ins_pipe( pipe_slow );
20029 %}
20030
20031 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20032 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20033 Matcher::vector_length(n->in(2)) >= 4);
20034 match(Set dst (MinReductionV dst src));
20035 match(Set dst (MaxReductionV dst src));
20036 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20037 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20038 ins_encode %{
20039 int opcode = this->ideal_Opcode();
20040 int vlen = Matcher::vector_length(this, $src);
20041 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20042 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20043 %}
20044 ins_pipe( pipe_slow );
20045 %}
20046
20047 //--------------------Min Double Reduction --------------------
20048 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20049 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20050 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20051 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20052 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20053 Matcher::vector_length(n->in(2)) == 2);
20054 match(Set dst (MinReductionV src1 src2));
20055 match(Set dst (MaxReductionV src1 src2));
20056 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20057 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20058 ins_encode %{
20059 assert(UseAVX > 0, "sanity");
20060
20061 int opcode = this->ideal_Opcode();
20062 int vlen = Matcher::vector_length(this, $src2);
20063 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20064 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20065 %}
20066 ins_pipe( pipe_slow );
20067 %}
20068
20069 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20070 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20071 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20072 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20073 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20074 Matcher::vector_length(n->in(2)) >= 4);
20075 match(Set dst (MinReductionV src1 src2));
20076 match(Set dst (MaxReductionV src1 src2));
20077 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20078 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20079 ins_encode %{
20080 assert(UseAVX > 0, "sanity");
20081
20082 int opcode = this->ideal_Opcode();
20083 int vlen = Matcher::vector_length(this, $src2);
20084 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20085 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20086 %}
20087 ins_pipe( pipe_slow );
20088 %}
20089
20090
20091 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20092 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20093 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20094 Matcher::vector_length(n->in(2)) == 2);
20095 match(Set dst (MinReductionV dst src));
20096 match(Set dst (MaxReductionV dst src));
20097 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20098 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20099 ins_encode %{
20100 assert(UseAVX > 0, "sanity");
20101
20102 int opcode = this->ideal_Opcode();
20103 int vlen = Matcher::vector_length(this, $src);
20104 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20105 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20106 %}
20107 ins_pipe( pipe_slow );
20108 %}
20109
20110 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20111 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20112 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20113 Matcher::vector_length(n->in(2)) >= 4);
20114 match(Set dst (MinReductionV dst src));
20115 match(Set dst (MaxReductionV dst src));
20116 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20117 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20118 ins_encode %{
20119 assert(UseAVX > 0, "sanity");
20120
20121 int opcode = this->ideal_Opcode();
20122 int vlen = Matcher::vector_length(this, $src);
20123 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20124 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20125 %}
20126 ins_pipe( pipe_slow );
20127 %}
20128
20129 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20130 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20131 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20132 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20133 Matcher::vector_length(n->in(2)) == 2);
20134 match(Set dst (MinReductionV src1 src2));
20135 match(Set dst (MaxReductionV src1 src2));
20136 effect(TEMP dst, TEMP xtmp1);
20137 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20138 ins_encode %{
20139 int opcode = this->ideal_Opcode();
20140 int vlen = Matcher::vector_length(this, $src2);
20141 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20142 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20143 %}
20144 ins_pipe( pipe_slow );
20145 %}
20146
20147 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20148 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20149 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20150 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20151 Matcher::vector_length(n->in(2)) >= 4);
20152 match(Set dst (MinReductionV src1 src2));
20153 match(Set dst (MaxReductionV src1 src2));
20154 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20155 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20156 ins_encode %{
20157 int opcode = this->ideal_Opcode();
20158 int vlen = Matcher::vector_length(this, $src2);
20159 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20160 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20161 %}
20162 ins_pipe( pipe_slow );
20163 %}
20164
20165
20166 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20167 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20168 Matcher::vector_length(n->in(2)) == 2);
20169 match(Set dst (MinReductionV dst src));
20170 match(Set dst (MaxReductionV dst src));
20171 effect(TEMP dst, TEMP xtmp1);
20172 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20173 ins_encode %{
20174 int opcode = this->ideal_Opcode();
20175 int vlen = Matcher::vector_length(this, $src);
20176 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20177 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20178 %}
20179 ins_pipe( pipe_slow );
20180 %}
20181
20182 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20183 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20184 Matcher::vector_length(n->in(2)) >= 4);
20185 match(Set dst (MinReductionV dst src));
20186 match(Set dst (MaxReductionV dst src));
20187 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20188 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20189 ins_encode %{
20190 int opcode = this->ideal_Opcode();
20191 int vlen = Matcher::vector_length(this, $src);
20192 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20193 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20194 %}
20195 ins_pipe( pipe_slow );
20196 %}
20197
20198 // ====================VECTOR ARITHMETIC=======================================
20199
20200 // --------------------------------- ADD --------------------------------------
20201
20202 // Bytes vector add
20203 instruct vaddB(vec dst, vec src) %{
20204 predicate(UseAVX == 0);
20205 match(Set dst (AddVB dst src));
20206 format %{ "paddb $dst,$src\t! add packedB" %}
20207 ins_encode %{
20208 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20209 %}
20210 ins_pipe( pipe_slow );
20211 %}
20212
20213 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20214 predicate(UseAVX > 0);
20215 match(Set dst (AddVB src1 src2));
20216 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20217 ins_encode %{
20218 int vlen_enc = vector_length_encoding(this);
20219 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20220 %}
20221 ins_pipe( pipe_slow );
20222 %}
20223
20224 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20225 predicate((UseAVX > 0) &&
20226 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20227 match(Set dst (AddVB src (LoadVector mem)));
20228 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20229 ins_encode %{
20230 int vlen_enc = vector_length_encoding(this);
20231 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20232 %}
20233 ins_pipe( pipe_slow );
20234 %}
20235
20236 // Shorts/Chars vector add
20237 instruct vaddS(vec dst, vec src) %{
20238 predicate(UseAVX == 0);
20239 match(Set dst (AddVS dst src));
20240 format %{ "paddw $dst,$src\t! add packedS" %}
20241 ins_encode %{
20242 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20243 %}
20244 ins_pipe( pipe_slow );
20245 %}
20246
20247 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20248 predicate(UseAVX > 0);
20249 match(Set dst (AddVS src1 src2));
20250 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20251 ins_encode %{
20252 int vlen_enc = vector_length_encoding(this);
20253 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20254 %}
20255 ins_pipe( pipe_slow );
20256 %}
20257
20258 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20259 predicate((UseAVX > 0) &&
20260 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20261 match(Set dst (AddVS src (LoadVector mem)));
20262 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20263 ins_encode %{
20264 int vlen_enc = vector_length_encoding(this);
20265 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20266 %}
20267 ins_pipe( pipe_slow );
20268 %}
20269
20270 // Integers vector add
20271 instruct vaddI(vec dst, vec src) %{
20272 predicate(UseAVX == 0);
20273 match(Set dst (AddVI dst src));
20274 format %{ "paddd $dst,$src\t! add packedI" %}
20275 ins_encode %{
20276 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20277 %}
20278 ins_pipe( pipe_slow );
20279 %}
20280
20281 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20282 predicate(UseAVX > 0);
20283 match(Set dst (AddVI src1 src2));
20284 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20285 ins_encode %{
20286 int vlen_enc = vector_length_encoding(this);
20287 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20288 %}
20289 ins_pipe( pipe_slow );
20290 %}
20291
20292
20293 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20294 predicate((UseAVX > 0) &&
20295 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20296 match(Set dst (AddVI src (LoadVector mem)));
20297 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20298 ins_encode %{
20299 int vlen_enc = vector_length_encoding(this);
20300 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20301 %}
20302 ins_pipe( pipe_slow );
20303 %}
20304
20305 // Longs vector add
20306 instruct vaddL(vec dst, vec src) %{
20307 predicate(UseAVX == 0);
20308 match(Set dst (AddVL dst src));
20309 format %{ "paddq $dst,$src\t! add packedL" %}
20310 ins_encode %{
20311 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20312 %}
20313 ins_pipe( pipe_slow );
20314 %}
20315
20316 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20317 predicate(UseAVX > 0);
20318 match(Set dst (AddVL src1 src2));
20319 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20320 ins_encode %{
20321 int vlen_enc = vector_length_encoding(this);
20322 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20323 %}
20324 ins_pipe( pipe_slow );
20325 %}
20326
20327 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20328 predicate((UseAVX > 0) &&
20329 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20330 match(Set dst (AddVL src (LoadVector mem)));
20331 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20332 ins_encode %{
20333 int vlen_enc = vector_length_encoding(this);
20334 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20335 %}
20336 ins_pipe( pipe_slow );
20337 %}
20338
20339 // Floats vector add
20340 instruct vaddF(vec dst, vec src) %{
20341 predicate(UseAVX == 0);
20342 match(Set dst (AddVF dst src));
20343 format %{ "addps $dst,$src\t! add packedF" %}
20344 ins_encode %{
20345 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20346 %}
20347 ins_pipe( pipe_slow );
20348 %}
20349
20350 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20351 predicate(UseAVX > 0);
20352 match(Set dst (AddVF src1 src2));
20353 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20354 ins_encode %{
20355 int vlen_enc = vector_length_encoding(this);
20356 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20357 %}
20358 ins_pipe( pipe_slow );
20359 %}
20360
20361 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20362 predicate((UseAVX > 0) &&
20363 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20364 match(Set dst (AddVF src (LoadVector mem)));
20365 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20366 ins_encode %{
20367 int vlen_enc = vector_length_encoding(this);
20368 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20369 %}
20370 ins_pipe( pipe_slow );
20371 %}
20372
20373 // Doubles vector add
20374 instruct vaddD(vec dst, vec src) %{
20375 predicate(UseAVX == 0);
20376 match(Set dst (AddVD dst src));
20377 format %{ "addpd $dst,$src\t! add packedD" %}
20378 ins_encode %{
20379 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20380 %}
20381 ins_pipe( pipe_slow );
20382 %}
20383
20384 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20385 predicate(UseAVX > 0);
20386 match(Set dst (AddVD src1 src2));
20387 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20388 ins_encode %{
20389 int vlen_enc = vector_length_encoding(this);
20390 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20391 %}
20392 ins_pipe( pipe_slow );
20393 %}
20394
20395 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20396 predicate((UseAVX > 0) &&
20397 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20398 match(Set dst (AddVD src (LoadVector mem)));
20399 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20400 ins_encode %{
20401 int vlen_enc = vector_length_encoding(this);
20402 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20403 %}
20404 ins_pipe( pipe_slow );
20405 %}
20406
20407 // --------------------------------- SUB --------------------------------------
20408
20409 // Bytes vector sub
20410 instruct vsubB(vec dst, vec src) %{
20411 predicate(UseAVX == 0);
20412 match(Set dst (SubVB dst src));
20413 format %{ "psubb $dst,$src\t! sub packedB" %}
20414 ins_encode %{
20415 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20416 %}
20417 ins_pipe( pipe_slow );
20418 %}
20419
20420 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20421 predicate(UseAVX > 0);
20422 match(Set dst (SubVB src1 src2));
20423 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20424 ins_encode %{
20425 int vlen_enc = vector_length_encoding(this);
20426 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20427 %}
20428 ins_pipe( pipe_slow );
20429 %}
20430
20431 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20432 predicate((UseAVX > 0) &&
20433 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20434 match(Set dst (SubVB src (LoadVector mem)));
20435 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20436 ins_encode %{
20437 int vlen_enc = vector_length_encoding(this);
20438 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20439 %}
20440 ins_pipe( pipe_slow );
20441 %}
20442
20443 // Shorts/Chars vector sub
20444 instruct vsubS(vec dst, vec src) %{
20445 predicate(UseAVX == 0);
20446 match(Set dst (SubVS dst src));
20447 format %{ "psubw $dst,$src\t! sub packedS" %}
20448 ins_encode %{
20449 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20450 %}
20451 ins_pipe( pipe_slow );
20452 %}
20453
20454
20455 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20456 predicate(UseAVX > 0);
20457 match(Set dst (SubVS src1 src2));
20458 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20459 ins_encode %{
20460 int vlen_enc = vector_length_encoding(this);
20461 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20462 %}
20463 ins_pipe( pipe_slow );
20464 %}
20465
20466 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20467 predicate((UseAVX > 0) &&
20468 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20469 match(Set dst (SubVS src (LoadVector mem)));
20470 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20471 ins_encode %{
20472 int vlen_enc = vector_length_encoding(this);
20473 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20474 %}
20475 ins_pipe( pipe_slow );
20476 %}
20477
20478 // Integers vector sub
20479 instruct vsubI(vec dst, vec src) %{
20480 predicate(UseAVX == 0);
20481 match(Set dst (SubVI dst src));
20482 format %{ "psubd $dst,$src\t! sub packedI" %}
20483 ins_encode %{
20484 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20485 %}
20486 ins_pipe( pipe_slow );
20487 %}
20488
20489 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20490 predicate(UseAVX > 0);
20491 match(Set dst (SubVI src1 src2));
20492 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20493 ins_encode %{
20494 int vlen_enc = vector_length_encoding(this);
20495 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20496 %}
20497 ins_pipe( pipe_slow );
20498 %}
20499
20500 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20501 predicate((UseAVX > 0) &&
20502 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20503 match(Set dst (SubVI src (LoadVector mem)));
20504 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20505 ins_encode %{
20506 int vlen_enc = vector_length_encoding(this);
20507 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20508 %}
20509 ins_pipe( pipe_slow );
20510 %}
20511
20512 // Longs vector sub
20513 instruct vsubL(vec dst, vec src) %{
20514 predicate(UseAVX == 0);
20515 match(Set dst (SubVL dst src));
20516 format %{ "psubq $dst,$src\t! sub packedL" %}
20517 ins_encode %{
20518 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20519 %}
20520 ins_pipe( pipe_slow );
20521 %}
20522
20523 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20524 predicate(UseAVX > 0);
20525 match(Set dst (SubVL src1 src2));
20526 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20527 ins_encode %{
20528 int vlen_enc = vector_length_encoding(this);
20529 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20530 %}
20531 ins_pipe( pipe_slow );
20532 %}
20533
20534
20535 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20536 predicate((UseAVX > 0) &&
20537 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20538 match(Set dst (SubVL src (LoadVector mem)));
20539 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20540 ins_encode %{
20541 int vlen_enc = vector_length_encoding(this);
20542 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20543 %}
20544 ins_pipe( pipe_slow );
20545 %}
20546
20547 // Floats vector sub
20548 instruct vsubF(vec dst, vec src) %{
20549 predicate(UseAVX == 0);
20550 match(Set dst (SubVF dst src));
20551 format %{ "subps $dst,$src\t! sub packedF" %}
20552 ins_encode %{
20553 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20554 %}
20555 ins_pipe( pipe_slow );
20556 %}
20557
20558 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20559 predicate(UseAVX > 0);
20560 match(Set dst (SubVF src1 src2));
20561 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20562 ins_encode %{
20563 int vlen_enc = vector_length_encoding(this);
20564 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20565 %}
20566 ins_pipe( pipe_slow );
20567 %}
20568
20569 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20570 predicate((UseAVX > 0) &&
20571 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20572 match(Set dst (SubVF src (LoadVector mem)));
20573 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20574 ins_encode %{
20575 int vlen_enc = vector_length_encoding(this);
20576 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20577 %}
20578 ins_pipe( pipe_slow );
20579 %}
20580
20581 // Doubles vector sub
20582 instruct vsubD(vec dst, vec src) %{
20583 predicate(UseAVX == 0);
20584 match(Set dst (SubVD dst src));
20585 format %{ "subpd $dst,$src\t! sub packedD" %}
20586 ins_encode %{
20587 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20588 %}
20589 ins_pipe( pipe_slow );
20590 %}
20591
20592 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20593 predicate(UseAVX > 0);
20594 match(Set dst (SubVD src1 src2));
20595 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20596 ins_encode %{
20597 int vlen_enc = vector_length_encoding(this);
20598 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20599 %}
20600 ins_pipe( pipe_slow );
20601 %}
20602
20603 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20604 predicate((UseAVX > 0) &&
20605 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20606 match(Set dst (SubVD src (LoadVector mem)));
20607 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20608 ins_encode %{
20609 int vlen_enc = vector_length_encoding(this);
20610 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20611 %}
20612 ins_pipe( pipe_slow );
20613 %}
20614
20615 // --------------------------------- MUL --------------------------------------
20616
20617 // Byte vector mul
20618 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20619 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20620 match(Set dst (MulVB src1 src2));
20621 effect(TEMP dst, TEMP xtmp);
20622 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20623 ins_encode %{
20624 assert(UseSSE > 3, "required");
20625 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20626 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20627 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20628 __ psllw($dst$$XMMRegister, 8);
20629 __ psrlw($dst$$XMMRegister, 8);
20630 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20631 %}
20632 ins_pipe( pipe_slow );
20633 %}
20634
20635 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20636 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20637 match(Set dst (MulVB src1 src2));
20638 effect(TEMP dst, TEMP xtmp);
20639 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20640 ins_encode %{
20641 assert(UseSSE > 3, "required");
20642 // Odd-index elements
20643 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20644 __ psrlw($dst$$XMMRegister, 8);
20645 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20646 __ psrlw($xtmp$$XMMRegister, 8);
20647 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20648 __ psllw($dst$$XMMRegister, 8);
20649 // Even-index elements
20650 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20651 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20652 __ psllw($xtmp$$XMMRegister, 8);
20653 __ psrlw($xtmp$$XMMRegister, 8);
20654 // Combine
20655 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20656 %}
20657 ins_pipe( pipe_slow );
20658 %}
20659
20660 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20661 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20662 match(Set dst (MulVB src1 src2));
20663 effect(TEMP xtmp1, TEMP xtmp2);
20664 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20665 ins_encode %{
20666 int vlen_enc = vector_length_encoding(this);
20667 // Odd-index elements
20668 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20669 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20670 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20671 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20672 // Even-index elements
20673 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20674 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20675 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20676 // Combine
20677 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20678 %}
20679 ins_pipe( pipe_slow );
20680 %}
20681
20682 // Shorts/Chars vector mul
20683 instruct vmulS(vec dst, vec src) %{
20684 predicate(UseAVX == 0);
20685 match(Set dst (MulVS dst src));
20686 format %{ "pmullw $dst,$src\t! mul packedS" %}
20687 ins_encode %{
20688 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20689 %}
20690 ins_pipe( pipe_slow );
20691 %}
20692
20693 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20694 predicate(UseAVX > 0);
20695 match(Set dst (MulVS src1 src2));
20696 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20697 ins_encode %{
20698 int vlen_enc = vector_length_encoding(this);
20699 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20700 %}
20701 ins_pipe( pipe_slow );
20702 %}
20703
20704 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20705 predicate((UseAVX > 0) &&
20706 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20707 match(Set dst (MulVS src (LoadVector mem)));
20708 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20709 ins_encode %{
20710 int vlen_enc = vector_length_encoding(this);
20711 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20712 %}
20713 ins_pipe( pipe_slow );
20714 %}
20715
20716 // Integers vector mul
20717 instruct vmulI(vec dst, vec src) %{
20718 predicate(UseAVX == 0);
20719 match(Set dst (MulVI dst src));
20720 format %{ "pmulld $dst,$src\t! mul packedI" %}
20721 ins_encode %{
20722 assert(UseSSE > 3, "required");
20723 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20724 %}
20725 ins_pipe( pipe_slow );
20726 %}
20727
20728 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20729 predicate(UseAVX > 0);
20730 match(Set dst (MulVI src1 src2));
20731 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20732 ins_encode %{
20733 int vlen_enc = vector_length_encoding(this);
20734 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20735 %}
20736 ins_pipe( pipe_slow );
20737 %}
20738
20739 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20740 predicate((UseAVX > 0) &&
20741 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20742 match(Set dst (MulVI src (LoadVector mem)));
20743 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20744 ins_encode %{
20745 int vlen_enc = vector_length_encoding(this);
20746 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20747 %}
20748 ins_pipe( pipe_slow );
20749 %}
20750
20751 // Longs vector mul
20752 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20753 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20754 VM_Version::supports_avx512dq()) ||
20755 VM_Version::supports_avx512vldq());
20756 match(Set dst (MulVL src1 src2));
20757 ins_cost(500);
20758 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20759 ins_encode %{
20760 assert(UseAVX > 2, "required");
20761 int vlen_enc = vector_length_encoding(this);
20762 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20763 %}
20764 ins_pipe( pipe_slow );
20765 %}
20766
20767 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20768 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20769 VM_Version::supports_avx512dq()) ||
20770 (Matcher::vector_length_in_bytes(n) > 8 &&
20771 VM_Version::supports_avx512vldq()));
20772 match(Set dst (MulVL src (LoadVector mem)));
20773 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20774 ins_cost(500);
20775 ins_encode %{
20776 assert(UseAVX > 2, "required");
20777 int vlen_enc = vector_length_encoding(this);
20778 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20779 %}
20780 ins_pipe( pipe_slow );
20781 %}
20782
20783 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20784 predicate(UseAVX == 0);
20785 match(Set dst (MulVL src1 src2));
20786 ins_cost(500);
20787 effect(TEMP dst, TEMP xtmp);
20788 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20789 ins_encode %{
20790 assert(VM_Version::supports_sse4_1(), "required");
20791 // Get the lo-hi products, only the lower 32 bits is in concerns
20792 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20793 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20794 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20795 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20796 __ psllq($dst$$XMMRegister, 32);
20797 // Get the lo-lo products
20798 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20799 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20800 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20801 %}
20802 ins_pipe( pipe_slow );
20803 %}
20804
20805 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20806 predicate(UseAVX > 0 &&
20807 ((Matcher::vector_length_in_bytes(n) == 64 &&
20808 !VM_Version::supports_avx512dq()) ||
20809 (Matcher::vector_length_in_bytes(n) < 64 &&
20810 !VM_Version::supports_avx512vldq())));
20811 match(Set dst (MulVL src1 src2));
20812 effect(TEMP xtmp1, TEMP xtmp2);
20813 ins_cost(500);
20814 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20815 ins_encode %{
20816 int vlen_enc = vector_length_encoding(this);
20817 // Get the lo-hi products, only the lower 32 bits is in concerns
20818 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20819 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20820 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20821 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20822 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20823 // Get the lo-lo products
20824 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20825 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20826 %}
20827 ins_pipe( pipe_slow );
20828 %}
20829
20830 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20831 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20832 match(Set dst (MulVL src1 src2));
20833 ins_cost(100);
20834 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20835 ins_encode %{
20836 int vlen_enc = vector_length_encoding(this);
20837 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20838 %}
20839 ins_pipe( pipe_slow );
20840 %}
20841
20842 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20843 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20844 match(Set dst (MulVL src1 src2));
20845 ins_cost(100);
20846 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20847 ins_encode %{
20848 int vlen_enc = vector_length_encoding(this);
20849 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20850 %}
20851 ins_pipe( pipe_slow );
20852 %}
20853
20854 // Floats vector mul
20855 instruct vmulF(vec dst, vec src) %{
20856 predicate(UseAVX == 0);
20857 match(Set dst (MulVF dst src));
20858 format %{ "mulps $dst,$src\t! mul packedF" %}
20859 ins_encode %{
20860 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20861 %}
20862 ins_pipe( pipe_slow );
20863 %}
20864
20865 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20866 predicate(UseAVX > 0);
20867 match(Set dst (MulVF src1 src2));
20868 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20869 ins_encode %{
20870 int vlen_enc = vector_length_encoding(this);
20871 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20872 %}
20873 ins_pipe( pipe_slow );
20874 %}
20875
20876 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20877 predicate((UseAVX > 0) &&
20878 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20879 match(Set dst (MulVF src (LoadVector mem)));
20880 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20881 ins_encode %{
20882 int vlen_enc = vector_length_encoding(this);
20883 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20884 %}
20885 ins_pipe( pipe_slow );
20886 %}
20887
20888 // Doubles vector mul
20889 instruct vmulD(vec dst, vec src) %{
20890 predicate(UseAVX == 0);
20891 match(Set dst (MulVD dst src));
20892 format %{ "mulpd $dst,$src\t! mul packedD" %}
20893 ins_encode %{
20894 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20895 %}
20896 ins_pipe( pipe_slow );
20897 %}
20898
20899 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20900 predicate(UseAVX > 0);
20901 match(Set dst (MulVD src1 src2));
20902 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20903 ins_encode %{
20904 int vlen_enc = vector_length_encoding(this);
20905 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20906 %}
20907 ins_pipe( pipe_slow );
20908 %}
20909
20910 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20911 predicate((UseAVX > 0) &&
20912 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20913 match(Set dst (MulVD src (LoadVector mem)));
20914 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20915 ins_encode %{
20916 int vlen_enc = vector_length_encoding(this);
20917 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20918 %}
20919 ins_pipe( pipe_slow );
20920 %}
20921
20922 // --------------------------------- DIV --------------------------------------
20923
20924 // Floats vector div
20925 instruct vdivF(vec dst, vec src) %{
20926 predicate(UseAVX == 0);
20927 match(Set dst (DivVF dst src));
20928 format %{ "divps $dst,$src\t! div packedF" %}
20929 ins_encode %{
20930 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20931 %}
20932 ins_pipe( pipe_slow );
20933 %}
20934
20935 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20936 predicate(UseAVX > 0);
20937 match(Set dst (DivVF src1 src2));
20938 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20939 ins_encode %{
20940 int vlen_enc = vector_length_encoding(this);
20941 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20942 %}
20943 ins_pipe( pipe_slow );
20944 %}
20945
20946 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20947 predicate((UseAVX > 0) &&
20948 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20949 match(Set dst (DivVF src (LoadVector mem)));
20950 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20951 ins_encode %{
20952 int vlen_enc = vector_length_encoding(this);
20953 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20954 %}
20955 ins_pipe( pipe_slow );
20956 %}
20957
20958 // Doubles vector div
20959 instruct vdivD(vec dst, vec src) %{
20960 predicate(UseAVX == 0);
20961 match(Set dst (DivVD dst src));
20962 format %{ "divpd $dst,$src\t! div packedD" %}
20963 ins_encode %{
20964 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20965 %}
20966 ins_pipe( pipe_slow );
20967 %}
20968
20969 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20970 predicate(UseAVX > 0);
20971 match(Set dst (DivVD src1 src2));
20972 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20973 ins_encode %{
20974 int vlen_enc = vector_length_encoding(this);
20975 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20976 %}
20977 ins_pipe( pipe_slow );
20978 %}
20979
20980 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20981 predicate((UseAVX > 0) &&
20982 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20983 match(Set dst (DivVD src (LoadVector mem)));
20984 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20985 ins_encode %{
20986 int vlen_enc = vector_length_encoding(this);
20987 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20988 %}
20989 ins_pipe( pipe_slow );
20990 %}
20991
20992 // ------------------------------ MinMax ---------------------------------------
20993
20994 // Byte, Short, Int vector Min/Max
20995 instruct minmax_reg_sse(vec dst, vec src) %{
20996 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20997 UseAVX == 0);
20998 match(Set dst (MinV dst src));
20999 match(Set dst (MaxV dst src));
21000 format %{ "vector_minmax $dst,$src\t! " %}
21001 ins_encode %{
21002 assert(UseSSE >= 4, "required");
21003
21004 int opcode = this->ideal_Opcode();
21005 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21006 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21007 %}
21008 ins_pipe( pipe_slow );
21009 %}
21010
21011 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21012 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21013 UseAVX > 0);
21014 match(Set dst (MinV src1 src2));
21015 match(Set dst (MaxV src1 src2));
21016 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21017 ins_encode %{
21018 int opcode = this->ideal_Opcode();
21019 int vlen_enc = vector_length_encoding(this);
21020 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21021
21022 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21023 %}
21024 ins_pipe( pipe_slow );
21025 %}
21026
21027 // Long vector Min/Max
21028 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21029 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21030 UseAVX == 0);
21031 match(Set dst (MinV dst src));
21032 match(Set dst (MaxV src dst));
21033 effect(TEMP dst, TEMP tmp);
21034 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21035 ins_encode %{
21036 assert(UseSSE >= 4, "required");
21037
21038 int opcode = this->ideal_Opcode();
21039 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21040 assert(elem_bt == T_LONG, "sanity");
21041
21042 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21043 %}
21044 ins_pipe( pipe_slow );
21045 %}
21046
21047 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21048 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21049 UseAVX > 0 && !VM_Version::supports_avx512vl());
21050 match(Set dst (MinV src1 src2));
21051 match(Set dst (MaxV src1 src2));
21052 effect(TEMP dst);
21053 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21054 ins_encode %{
21055 int vlen_enc = vector_length_encoding(this);
21056 int opcode = this->ideal_Opcode();
21057 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21058 assert(elem_bt == T_LONG, "sanity");
21059
21060 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21061 %}
21062 ins_pipe( pipe_slow );
21063 %}
21064
21065 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21066 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21067 Matcher::vector_element_basic_type(n) == T_LONG);
21068 match(Set dst (MinV src1 src2));
21069 match(Set dst (MaxV src1 src2));
21070 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21071 ins_encode %{
21072 assert(UseAVX > 2, "required");
21073
21074 int vlen_enc = vector_length_encoding(this);
21075 int opcode = this->ideal_Opcode();
21076 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21077 assert(elem_bt == T_LONG, "sanity");
21078
21079 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21080 %}
21081 ins_pipe( pipe_slow );
21082 %}
21083
21084 // Float/Double vector Min/Max
21085 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21086 predicate(VM_Version::supports_avx10_2() &&
21087 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21088 match(Set dst (MinV a b));
21089 match(Set dst (MaxV a b));
21090 format %{ "vector_minmaxFP $dst, $a, $b" %}
21091 ins_encode %{
21092 int vlen_enc = vector_length_encoding(this);
21093 int opcode = this->ideal_Opcode();
21094 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21095 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21096 %}
21097 ins_pipe( pipe_slow );
21098 %}
21099
21100 // Float/Double vector Min/Max
21101 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21102 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21103 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21104 UseAVX > 0);
21105 match(Set dst (MinV a b));
21106 match(Set dst (MaxV a b));
21107 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21108 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21109 ins_encode %{
21110 assert(UseAVX > 0, "required");
21111
21112 int opcode = this->ideal_Opcode();
21113 int vlen_enc = vector_length_encoding(this);
21114 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21115
21116 __ vminmax_fp(opcode, elem_bt,
21117 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21118 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21119 %}
21120 ins_pipe( pipe_slow );
21121 %}
21122
21123 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21124 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21125 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21126 match(Set dst (MinV a b));
21127 match(Set dst (MaxV a b));
21128 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21129 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21130 ins_encode %{
21131 assert(UseAVX > 2, "required");
21132
21133 int opcode = this->ideal_Opcode();
21134 int vlen_enc = vector_length_encoding(this);
21135 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21136
21137 __ evminmax_fp(opcode, elem_bt,
21138 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21139 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21140 %}
21141 ins_pipe( pipe_slow );
21142 %}
21143
21144 // ------------------------------ Unsigned vector Min/Max ----------------------
21145
21146 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21147 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21148 match(Set dst (UMinV a b));
21149 match(Set dst (UMaxV a b));
21150 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21151 ins_encode %{
21152 int opcode = this->ideal_Opcode();
21153 int vlen_enc = vector_length_encoding(this);
21154 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21155 assert(is_integral_type(elem_bt), "");
21156 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21157 %}
21158 ins_pipe( pipe_slow );
21159 %}
21160
21161 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21162 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21163 match(Set dst (UMinV a (LoadVector b)));
21164 match(Set dst (UMaxV a (LoadVector b)));
21165 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21166 ins_encode %{
21167 int opcode = this->ideal_Opcode();
21168 int vlen_enc = vector_length_encoding(this);
21169 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21170 assert(is_integral_type(elem_bt), "");
21171 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21172 %}
21173 ins_pipe( pipe_slow );
21174 %}
21175
21176 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21177 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21178 match(Set dst (UMinV a b));
21179 match(Set dst (UMaxV a b));
21180 effect(TEMP xtmp1, TEMP xtmp2);
21181 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21182 ins_encode %{
21183 int opcode = this->ideal_Opcode();
21184 int vlen_enc = vector_length_encoding(this);
21185 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21186 %}
21187 ins_pipe( pipe_slow );
21188 %}
21189
21190 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21191 match(Set dst (UMinV (Binary dst src2) mask));
21192 match(Set dst (UMaxV (Binary dst src2) mask));
21193 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21194 ins_encode %{
21195 int vlen_enc = vector_length_encoding(this);
21196 BasicType bt = Matcher::vector_element_basic_type(this);
21197 int opc = this->ideal_Opcode();
21198 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21199 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21200 %}
21201 ins_pipe( pipe_slow );
21202 %}
21203
21204 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21205 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21206 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21207 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21208 ins_encode %{
21209 int vlen_enc = vector_length_encoding(this);
21210 BasicType bt = Matcher::vector_element_basic_type(this);
21211 int opc = this->ideal_Opcode();
21212 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21213 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21214 %}
21215 ins_pipe( pipe_slow );
21216 %}
21217
21218 // --------------------------------- Signum/CopySign ---------------------------
21219
21220 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21221 match(Set dst (SignumF dst (Binary zero one)));
21222 effect(KILL cr);
21223 format %{ "signumF $dst, $dst" %}
21224 ins_encode %{
21225 int opcode = this->ideal_Opcode();
21226 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21227 %}
21228 ins_pipe( pipe_slow );
21229 %}
21230
21231 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21232 match(Set dst (SignumD dst (Binary zero one)));
21233 effect(KILL cr);
21234 format %{ "signumD $dst, $dst" %}
21235 ins_encode %{
21236 int opcode = this->ideal_Opcode();
21237 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21238 %}
21239 ins_pipe( pipe_slow );
21240 %}
21241
21242 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21243 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21244 match(Set dst (SignumVF src (Binary zero one)));
21245 match(Set dst (SignumVD src (Binary zero one)));
21246 effect(TEMP dst, TEMP xtmp1);
21247 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21248 ins_encode %{
21249 int opcode = this->ideal_Opcode();
21250 int vec_enc = vector_length_encoding(this);
21251 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21252 $xtmp1$$XMMRegister, vec_enc);
21253 %}
21254 ins_pipe( pipe_slow );
21255 %}
21256
21257 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21258 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21259 match(Set dst (SignumVF src (Binary zero one)));
21260 match(Set dst (SignumVD src (Binary zero one)));
21261 effect(TEMP dst, TEMP ktmp1);
21262 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21263 ins_encode %{
21264 int opcode = this->ideal_Opcode();
21265 int vec_enc = vector_length_encoding(this);
21266 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21267 $ktmp1$$KRegister, vec_enc);
21268 %}
21269 ins_pipe( pipe_slow );
21270 %}
21271
21272 // ---------------------------------------
21273 // For copySign use 0xE4 as writemask for vpternlog
21274 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21275 // C (xmm2) is set to 0x7FFFFFFF
21276 // Wherever xmm2 is 0, we want to pick from B (sign)
21277 // Wherever xmm2 is 1, we want to pick from A (src)
21278 //
21279 // A B C Result
21280 // 0 0 0 0
21281 // 0 0 1 0
21282 // 0 1 0 1
21283 // 0 1 1 0
21284 // 1 0 0 0
21285 // 1 0 1 1
21286 // 1 1 0 1
21287 // 1 1 1 1
21288 //
21289 // Result going from high bit to low bit is 0x11100100 = 0xe4
21290 // ---------------------------------------
21291
21292 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21293 match(Set dst (CopySignF dst src));
21294 effect(TEMP tmp1, TEMP tmp2);
21295 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21296 ins_encode %{
21297 __ movl($tmp2$$Register, 0x7FFFFFFF);
21298 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21299 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21300 %}
21301 ins_pipe( pipe_slow );
21302 %}
21303
21304 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21305 match(Set dst (CopySignD dst (Binary src zero)));
21306 ins_cost(100);
21307 effect(TEMP tmp1, TEMP tmp2);
21308 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21309 ins_encode %{
21310 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21311 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21312 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21313 %}
21314 ins_pipe( pipe_slow );
21315 %}
21316
21317 //----------------------------- CompressBits/ExpandBits ------------------------
21318
21319 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21320 predicate(n->bottom_type()->isa_int());
21321 match(Set dst (CompressBits src mask));
21322 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21323 ins_encode %{
21324 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21325 %}
21326 ins_pipe( pipe_slow );
21327 %}
21328
21329 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21330 predicate(n->bottom_type()->isa_int());
21331 match(Set dst (ExpandBits src mask));
21332 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21333 ins_encode %{
21334 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21335 %}
21336 ins_pipe( pipe_slow );
21337 %}
21338
21339 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21340 predicate(n->bottom_type()->isa_int());
21341 match(Set dst (CompressBits src (LoadI mask)));
21342 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21343 ins_encode %{
21344 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21345 %}
21346 ins_pipe( pipe_slow );
21347 %}
21348
21349 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21350 predicate(n->bottom_type()->isa_int());
21351 match(Set dst (ExpandBits src (LoadI mask)));
21352 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21353 ins_encode %{
21354 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21355 %}
21356 ins_pipe( pipe_slow );
21357 %}
21358
21359 // --------------------------------- Sqrt --------------------------------------
21360
21361 instruct vsqrtF_reg(vec dst, vec src) %{
21362 match(Set dst (SqrtVF src));
21363 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21364 ins_encode %{
21365 assert(UseAVX > 0, "required");
21366 int vlen_enc = vector_length_encoding(this);
21367 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21368 %}
21369 ins_pipe( pipe_slow );
21370 %}
21371
21372 instruct vsqrtF_mem(vec dst, memory mem) %{
21373 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21374 match(Set dst (SqrtVF (LoadVector mem)));
21375 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21376 ins_encode %{
21377 assert(UseAVX > 0, "required");
21378 int vlen_enc = vector_length_encoding(this);
21379 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21380 %}
21381 ins_pipe( pipe_slow );
21382 %}
21383
21384 // Floating point vector sqrt
21385 instruct vsqrtD_reg(vec dst, vec src) %{
21386 match(Set dst (SqrtVD src));
21387 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21388 ins_encode %{
21389 assert(UseAVX > 0, "required");
21390 int vlen_enc = vector_length_encoding(this);
21391 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21392 %}
21393 ins_pipe( pipe_slow );
21394 %}
21395
21396 instruct vsqrtD_mem(vec dst, memory mem) %{
21397 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21398 match(Set dst (SqrtVD (LoadVector mem)));
21399 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21400 ins_encode %{
21401 assert(UseAVX > 0, "required");
21402 int vlen_enc = vector_length_encoding(this);
21403 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21404 %}
21405 ins_pipe( pipe_slow );
21406 %}
21407
21408 // ------------------------------ Shift ---------------------------------------
21409
21410 // Left and right shift count vectors are the same on x86
21411 // (only lowest bits of xmm reg are used for count).
21412 instruct vshiftcnt(vec dst, rRegI cnt) %{
21413 match(Set dst (LShiftCntV cnt));
21414 match(Set dst (RShiftCntV cnt));
21415 format %{ "movdl $dst,$cnt\t! load shift count" %}
21416 ins_encode %{
21417 __ movdl($dst$$XMMRegister, $cnt$$Register);
21418 %}
21419 ins_pipe( pipe_slow );
21420 %}
21421
21422 // Byte vector shift
21423 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21424 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21425 match(Set dst ( LShiftVB src shift));
21426 match(Set dst ( RShiftVB src shift));
21427 match(Set dst (URShiftVB src shift));
21428 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21429 format %{"vector_byte_shift $dst,$src,$shift" %}
21430 ins_encode %{
21431 assert(UseSSE > 3, "required");
21432 int opcode = this->ideal_Opcode();
21433 bool sign = (opcode != Op_URShiftVB);
21434 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21435 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21436 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21437 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21438 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21439 %}
21440 ins_pipe( pipe_slow );
21441 %}
21442
21443 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21444 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21445 UseAVX <= 1);
21446 match(Set dst ( LShiftVB src shift));
21447 match(Set dst ( RShiftVB src shift));
21448 match(Set dst (URShiftVB src shift));
21449 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21450 format %{"vector_byte_shift $dst,$src,$shift" %}
21451 ins_encode %{
21452 assert(UseSSE > 3, "required");
21453 int opcode = this->ideal_Opcode();
21454 bool sign = (opcode != Op_URShiftVB);
21455 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21456 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21457 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21458 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21459 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21460 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21461 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21462 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21463 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21464 %}
21465 ins_pipe( pipe_slow );
21466 %}
21467
21468 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21469 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21470 UseAVX > 1);
21471 match(Set dst ( LShiftVB src shift));
21472 match(Set dst ( RShiftVB src shift));
21473 match(Set dst (URShiftVB src shift));
21474 effect(TEMP dst, TEMP tmp);
21475 format %{"vector_byte_shift $dst,$src,$shift" %}
21476 ins_encode %{
21477 int opcode = this->ideal_Opcode();
21478 bool sign = (opcode != Op_URShiftVB);
21479 int vlen_enc = Assembler::AVX_256bit;
21480 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21481 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21482 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21483 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21484 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21485 %}
21486 ins_pipe( pipe_slow );
21487 %}
21488
21489 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21490 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21491 match(Set dst ( LShiftVB src shift));
21492 match(Set dst ( RShiftVB src shift));
21493 match(Set dst (URShiftVB src shift));
21494 effect(TEMP dst, TEMP tmp);
21495 format %{"vector_byte_shift $dst,$src,$shift" %}
21496 ins_encode %{
21497 assert(UseAVX > 1, "required");
21498 int opcode = this->ideal_Opcode();
21499 bool sign = (opcode != Op_URShiftVB);
21500 int vlen_enc = Assembler::AVX_256bit;
21501 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21502 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21503 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21504 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21505 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21506 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21507 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21508 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21509 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21510 %}
21511 ins_pipe( pipe_slow );
21512 %}
21513
21514 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21515 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21516 match(Set dst ( LShiftVB src shift));
21517 match(Set dst (RShiftVB src shift));
21518 match(Set dst (URShiftVB src shift));
21519 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21520 format %{"vector_byte_shift $dst,$src,$shift" %}
21521 ins_encode %{
21522 assert(UseAVX > 2, "required");
21523 int opcode = this->ideal_Opcode();
21524 bool sign = (opcode != Op_URShiftVB);
21525 int vlen_enc = Assembler::AVX_512bit;
21526 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21527 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21528 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21529 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21530 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21531 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21532 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21533 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21534 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21535 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21536 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21537 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21538 %}
21539 ins_pipe( pipe_slow );
21540 %}
21541
21542 // Shorts vector logical right shift produces incorrect Java result
21543 // for negative data because java code convert short value into int with
21544 // sign extension before a shift. But char vectors are fine since chars are
21545 // unsigned values.
21546 // Shorts/Chars vector left shift
21547 instruct vshiftS(vec dst, vec src, vec shift) %{
21548 predicate(!n->as_ShiftV()->is_var_shift());
21549 match(Set dst ( LShiftVS src shift));
21550 match(Set dst ( RShiftVS src shift));
21551 match(Set dst (URShiftVS src shift));
21552 effect(TEMP dst, USE src, USE shift);
21553 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21554 ins_encode %{
21555 int opcode = this->ideal_Opcode();
21556 if (UseAVX > 0) {
21557 int vlen_enc = vector_length_encoding(this);
21558 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21559 } else {
21560 int vlen = Matcher::vector_length(this);
21561 if (vlen == 2) {
21562 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21563 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21564 } else if (vlen == 4) {
21565 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21566 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21567 } else {
21568 assert (vlen == 8, "sanity");
21569 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21570 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21571 }
21572 }
21573 %}
21574 ins_pipe( pipe_slow );
21575 %}
21576
21577 // Integers vector left shift
21578 instruct vshiftI(vec dst, vec src, vec shift) %{
21579 predicate(!n->as_ShiftV()->is_var_shift());
21580 match(Set dst ( LShiftVI src shift));
21581 match(Set dst ( RShiftVI src shift));
21582 match(Set dst (URShiftVI src shift));
21583 effect(TEMP dst, USE src, USE shift);
21584 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21585 ins_encode %{
21586 int opcode = this->ideal_Opcode();
21587 if (UseAVX > 0) {
21588 int vlen_enc = vector_length_encoding(this);
21589 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21590 } else {
21591 int vlen = Matcher::vector_length(this);
21592 if (vlen == 2) {
21593 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21594 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21595 } else {
21596 assert(vlen == 4, "sanity");
21597 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21598 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21599 }
21600 }
21601 %}
21602 ins_pipe( pipe_slow );
21603 %}
21604
21605 // Integers vector left constant shift
21606 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21607 match(Set dst (LShiftVI src (LShiftCntV shift)));
21608 match(Set dst (RShiftVI src (RShiftCntV shift)));
21609 match(Set dst (URShiftVI src (RShiftCntV shift)));
21610 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21611 ins_encode %{
21612 int opcode = this->ideal_Opcode();
21613 if (UseAVX > 0) {
21614 int vector_len = vector_length_encoding(this);
21615 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21616 } else {
21617 int vlen = Matcher::vector_length(this);
21618 if (vlen == 2) {
21619 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21620 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21621 } else {
21622 assert(vlen == 4, "sanity");
21623 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21624 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21625 }
21626 }
21627 %}
21628 ins_pipe( pipe_slow );
21629 %}
21630
21631 // Longs vector shift
21632 instruct vshiftL(vec dst, vec src, vec shift) %{
21633 predicate(!n->as_ShiftV()->is_var_shift());
21634 match(Set dst ( LShiftVL src shift));
21635 match(Set dst (URShiftVL src shift));
21636 effect(TEMP dst, USE src, USE shift);
21637 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21638 ins_encode %{
21639 int opcode = this->ideal_Opcode();
21640 if (UseAVX > 0) {
21641 int vlen_enc = vector_length_encoding(this);
21642 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21643 } else {
21644 assert(Matcher::vector_length(this) == 2, "");
21645 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21646 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21647 }
21648 %}
21649 ins_pipe( pipe_slow );
21650 %}
21651
21652 // Longs vector constant shift
21653 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21654 match(Set dst (LShiftVL src (LShiftCntV shift)));
21655 match(Set dst (URShiftVL src (RShiftCntV shift)));
21656 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21657 ins_encode %{
21658 int opcode = this->ideal_Opcode();
21659 if (UseAVX > 0) {
21660 int vector_len = vector_length_encoding(this);
21661 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21662 } else {
21663 assert(Matcher::vector_length(this) == 2, "");
21664 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21665 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21666 }
21667 %}
21668 ins_pipe( pipe_slow );
21669 %}
21670
21671 // -------------------ArithmeticRightShift -----------------------------------
21672 // Long vector arithmetic right shift
21673 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21674 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21675 match(Set dst (RShiftVL src shift));
21676 effect(TEMP dst, TEMP tmp);
21677 format %{ "vshiftq $dst,$src,$shift" %}
21678 ins_encode %{
21679 uint vlen = Matcher::vector_length(this);
21680 if (vlen == 2) {
21681 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21682 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21683 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21684 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21685 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21686 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21687 } else {
21688 assert(vlen == 4, "sanity");
21689 assert(UseAVX > 1, "required");
21690 int vlen_enc = Assembler::AVX_256bit;
21691 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21692 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21693 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21694 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21695 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21696 }
21697 %}
21698 ins_pipe( pipe_slow );
21699 %}
21700
21701 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21702 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21703 match(Set dst (RShiftVL src shift));
21704 format %{ "vshiftq $dst,$src,$shift" %}
21705 ins_encode %{
21706 int vlen_enc = vector_length_encoding(this);
21707 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21708 %}
21709 ins_pipe( pipe_slow );
21710 %}
21711
21712 // ------------------- Variable Shift -----------------------------
21713 // Byte variable shift
21714 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21715 predicate(Matcher::vector_length(n) <= 8 &&
21716 n->as_ShiftV()->is_var_shift() &&
21717 !VM_Version::supports_avx512bw());
21718 match(Set dst ( LShiftVB src shift));
21719 match(Set dst ( RShiftVB src shift));
21720 match(Set dst (URShiftVB src shift));
21721 effect(TEMP dst, TEMP vtmp);
21722 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21723 ins_encode %{
21724 assert(UseAVX >= 2, "required");
21725
21726 int opcode = this->ideal_Opcode();
21727 int vlen_enc = Assembler::AVX_128bit;
21728 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21729 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21730 %}
21731 ins_pipe( pipe_slow );
21732 %}
21733
21734 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21735 predicate(Matcher::vector_length(n) == 16 &&
21736 n->as_ShiftV()->is_var_shift() &&
21737 !VM_Version::supports_avx512bw());
21738 match(Set dst ( LShiftVB src shift));
21739 match(Set dst ( RShiftVB src shift));
21740 match(Set dst (URShiftVB src shift));
21741 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21742 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21743 ins_encode %{
21744 assert(UseAVX >= 2, "required");
21745
21746 int opcode = this->ideal_Opcode();
21747 int vlen_enc = Assembler::AVX_128bit;
21748 // Shift lower half and get word result in dst
21749 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21750
21751 // Shift upper half and get word result in vtmp1
21752 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21753 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21754 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21755
21756 // Merge and down convert the two word results to byte in dst
21757 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21758 %}
21759 ins_pipe( pipe_slow );
21760 %}
21761
21762 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21763 predicate(Matcher::vector_length(n) == 32 &&
21764 n->as_ShiftV()->is_var_shift() &&
21765 !VM_Version::supports_avx512bw());
21766 match(Set dst ( LShiftVB src shift));
21767 match(Set dst ( RShiftVB src shift));
21768 match(Set dst (URShiftVB src shift));
21769 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21770 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21771 ins_encode %{
21772 assert(UseAVX >= 2, "required");
21773
21774 int opcode = this->ideal_Opcode();
21775 int vlen_enc = Assembler::AVX_128bit;
21776 // Process lower 128 bits and get result in dst
21777 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21778 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21779 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21780 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21781 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21782
21783 // Process higher 128 bits and get result in vtmp3
21784 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21785 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21786 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21787 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21788 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21789 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21790 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21791
21792 // Merge the two results in dst
21793 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21794 %}
21795 ins_pipe( pipe_slow );
21796 %}
21797
21798 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21799 predicate(Matcher::vector_length(n) <= 32 &&
21800 n->as_ShiftV()->is_var_shift() &&
21801 VM_Version::supports_avx512bw());
21802 match(Set dst ( LShiftVB src shift));
21803 match(Set dst ( RShiftVB src shift));
21804 match(Set dst (URShiftVB src shift));
21805 effect(TEMP dst, TEMP vtmp);
21806 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21807 ins_encode %{
21808 assert(UseAVX > 2, "required");
21809
21810 int opcode = this->ideal_Opcode();
21811 int vlen_enc = vector_length_encoding(this);
21812 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21813 %}
21814 ins_pipe( pipe_slow );
21815 %}
21816
21817 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21818 predicate(Matcher::vector_length(n) == 64 &&
21819 n->as_ShiftV()->is_var_shift() &&
21820 VM_Version::supports_avx512bw());
21821 match(Set dst ( LShiftVB src shift));
21822 match(Set dst ( RShiftVB src shift));
21823 match(Set dst (URShiftVB src shift));
21824 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21825 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21826 ins_encode %{
21827 assert(UseAVX > 2, "required");
21828
21829 int opcode = this->ideal_Opcode();
21830 int vlen_enc = Assembler::AVX_256bit;
21831 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21832 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21833 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21834 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21835 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21836 %}
21837 ins_pipe( pipe_slow );
21838 %}
21839
21840 // Short variable shift
21841 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21842 predicate(Matcher::vector_length(n) <= 8 &&
21843 n->as_ShiftV()->is_var_shift() &&
21844 !VM_Version::supports_avx512bw());
21845 match(Set dst ( LShiftVS src shift));
21846 match(Set dst ( RShiftVS src shift));
21847 match(Set dst (URShiftVS src shift));
21848 effect(TEMP dst, TEMP vtmp);
21849 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21850 ins_encode %{
21851 assert(UseAVX >= 2, "required");
21852
21853 int opcode = this->ideal_Opcode();
21854 bool sign = (opcode != Op_URShiftVS);
21855 int vlen_enc = Assembler::AVX_256bit;
21856 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21857 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21858 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21859 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21860 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21861 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21862 %}
21863 ins_pipe( pipe_slow );
21864 %}
21865
21866 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21867 predicate(Matcher::vector_length(n) == 16 &&
21868 n->as_ShiftV()->is_var_shift() &&
21869 !VM_Version::supports_avx512bw());
21870 match(Set dst ( LShiftVS src shift));
21871 match(Set dst ( RShiftVS src shift));
21872 match(Set dst (URShiftVS src shift));
21873 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21874 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21875 ins_encode %{
21876 assert(UseAVX >= 2, "required");
21877
21878 int opcode = this->ideal_Opcode();
21879 bool sign = (opcode != Op_URShiftVS);
21880 int vlen_enc = Assembler::AVX_256bit;
21881 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21882 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21883 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21884 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21885 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21886
21887 // Shift upper half, with result in dst using vtmp1 as TEMP
21888 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21889 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21890 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21891 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21892 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21893 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21894
21895 // Merge lower and upper half result into dst
21896 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21897 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21898 %}
21899 ins_pipe( pipe_slow );
21900 %}
21901
21902 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21903 predicate(n->as_ShiftV()->is_var_shift() &&
21904 VM_Version::supports_avx512bw());
21905 match(Set dst ( LShiftVS src shift));
21906 match(Set dst ( RShiftVS src shift));
21907 match(Set dst (URShiftVS src shift));
21908 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21909 ins_encode %{
21910 assert(UseAVX > 2, "required");
21911
21912 int opcode = this->ideal_Opcode();
21913 int vlen_enc = vector_length_encoding(this);
21914 if (!VM_Version::supports_avx512vl()) {
21915 vlen_enc = Assembler::AVX_512bit;
21916 }
21917 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21918 %}
21919 ins_pipe( pipe_slow );
21920 %}
21921
21922 //Integer variable shift
21923 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21924 predicate(n->as_ShiftV()->is_var_shift());
21925 match(Set dst ( LShiftVI src shift));
21926 match(Set dst ( RShiftVI src shift));
21927 match(Set dst (URShiftVI src shift));
21928 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21929 ins_encode %{
21930 assert(UseAVX >= 2, "required");
21931
21932 int opcode = this->ideal_Opcode();
21933 int vlen_enc = vector_length_encoding(this);
21934 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21935 %}
21936 ins_pipe( pipe_slow );
21937 %}
21938
21939 //Long variable shift
21940 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21941 predicate(n->as_ShiftV()->is_var_shift());
21942 match(Set dst ( LShiftVL src shift));
21943 match(Set dst (URShiftVL src shift));
21944 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21945 ins_encode %{
21946 assert(UseAVX >= 2, "required");
21947
21948 int opcode = this->ideal_Opcode();
21949 int vlen_enc = vector_length_encoding(this);
21950 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21951 %}
21952 ins_pipe( pipe_slow );
21953 %}
21954
21955 //Long variable right shift arithmetic
21956 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21957 predicate(Matcher::vector_length(n) <= 4 &&
21958 n->as_ShiftV()->is_var_shift() &&
21959 UseAVX == 2);
21960 match(Set dst (RShiftVL src shift));
21961 effect(TEMP dst, TEMP vtmp);
21962 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21963 ins_encode %{
21964 int opcode = this->ideal_Opcode();
21965 int vlen_enc = vector_length_encoding(this);
21966 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21967 $vtmp$$XMMRegister);
21968 %}
21969 ins_pipe( pipe_slow );
21970 %}
21971
21972 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21973 predicate(n->as_ShiftV()->is_var_shift() &&
21974 UseAVX > 2);
21975 match(Set dst (RShiftVL src shift));
21976 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21977 ins_encode %{
21978 int opcode = this->ideal_Opcode();
21979 int vlen_enc = vector_length_encoding(this);
21980 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21981 %}
21982 ins_pipe( pipe_slow );
21983 %}
21984
21985 // --------------------------------- AND --------------------------------------
21986
21987 instruct vand(vec dst, vec src) %{
21988 predicate(UseAVX == 0);
21989 match(Set dst (AndV dst src));
21990 format %{ "pand $dst,$src\t! and vectors" %}
21991 ins_encode %{
21992 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21993 %}
21994 ins_pipe( pipe_slow );
21995 %}
21996
21997 instruct vand_reg(vec dst, vec src1, vec src2) %{
21998 predicate(UseAVX > 0);
21999 match(Set dst (AndV src1 src2));
22000 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22001 ins_encode %{
22002 int vlen_enc = vector_length_encoding(this);
22003 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22004 %}
22005 ins_pipe( pipe_slow );
22006 %}
22007
22008 instruct vand_mem(vec dst, vec src, memory mem) %{
22009 predicate((UseAVX > 0) &&
22010 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22011 match(Set dst (AndV src (LoadVector mem)));
22012 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22013 ins_encode %{
22014 int vlen_enc = vector_length_encoding(this);
22015 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22016 %}
22017 ins_pipe( pipe_slow );
22018 %}
22019
22020 // --------------------------------- OR ---------------------------------------
22021
22022 instruct vor(vec dst, vec src) %{
22023 predicate(UseAVX == 0);
22024 match(Set dst (OrV dst src));
22025 format %{ "por $dst,$src\t! or vectors" %}
22026 ins_encode %{
22027 __ por($dst$$XMMRegister, $src$$XMMRegister);
22028 %}
22029 ins_pipe( pipe_slow );
22030 %}
22031
22032 instruct vor_reg(vec dst, vec src1, vec src2) %{
22033 predicate(UseAVX > 0);
22034 match(Set dst (OrV src1 src2));
22035 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22036 ins_encode %{
22037 int vlen_enc = vector_length_encoding(this);
22038 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22039 %}
22040 ins_pipe( pipe_slow );
22041 %}
22042
22043 instruct vor_mem(vec dst, vec src, memory mem) %{
22044 predicate((UseAVX > 0) &&
22045 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22046 match(Set dst (OrV src (LoadVector mem)));
22047 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22048 ins_encode %{
22049 int vlen_enc = vector_length_encoding(this);
22050 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22051 %}
22052 ins_pipe( pipe_slow );
22053 %}
22054
22055 // --------------------------------- XOR --------------------------------------
22056
22057 instruct vxor(vec dst, vec src) %{
22058 predicate(UseAVX == 0);
22059 match(Set dst (XorV dst src));
22060 format %{ "pxor $dst,$src\t! xor vectors" %}
22061 ins_encode %{
22062 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22063 %}
22064 ins_pipe( pipe_slow );
22065 %}
22066
22067 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22068 predicate(UseAVX > 0);
22069 match(Set dst (XorV src1 src2));
22070 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22071 ins_encode %{
22072 int vlen_enc = vector_length_encoding(this);
22073 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22074 %}
22075 ins_pipe( pipe_slow );
22076 %}
22077
22078 instruct vxor_mem(vec dst, vec src, memory mem) %{
22079 predicate((UseAVX > 0) &&
22080 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22081 match(Set dst (XorV src (LoadVector mem)));
22082 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22083 ins_encode %{
22084 int vlen_enc = vector_length_encoding(this);
22085 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22086 %}
22087 ins_pipe( pipe_slow );
22088 %}
22089
22090 // --------------------------------- VectorCast --------------------------------------
22091
22092 instruct vcastBtoX(vec dst, vec src) %{
22093 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22094 match(Set dst (VectorCastB2X src));
22095 format %{ "vector_cast_b2x $dst,$src\t!" %}
22096 ins_encode %{
22097 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22098 int vlen_enc = vector_length_encoding(this);
22099 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22100 %}
22101 ins_pipe( pipe_slow );
22102 %}
22103
22104 instruct vcastBtoD(legVec dst, legVec src) %{
22105 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22106 match(Set dst (VectorCastB2X src));
22107 format %{ "vector_cast_b2x $dst,$src\t!" %}
22108 ins_encode %{
22109 int vlen_enc = vector_length_encoding(this);
22110 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22111 %}
22112 ins_pipe( pipe_slow );
22113 %}
22114
22115 instruct castStoX(vec dst, vec src) %{
22116 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22117 Matcher::vector_length(n->in(1)) <= 8 && // src
22118 Matcher::vector_element_basic_type(n) == T_BYTE);
22119 match(Set dst (VectorCastS2X src));
22120 format %{ "vector_cast_s2x $dst,$src" %}
22121 ins_encode %{
22122 assert(UseAVX > 0, "required");
22123
22124 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22125 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22126 %}
22127 ins_pipe( pipe_slow );
22128 %}
22129
22130 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22131 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22132 Matcher::vector_length(n->in(1)) == 16 && // src
22133 Matcher::vector_element_basic_type(n) == T_BYTE);
22134 effect(TEMP dst, TEMP vtmp);
22135 match(Set dst (VectorCastS2X src));
22136 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22137 ins_encode %{
22138 assert(UseAVX > 0, "required");
22139
22140 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22141 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22142 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22143 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22144 %}
22145 ins_pipe( pipe_slow );
22146 %}
22147
22148 instruct vcastStoX_evex(vec dst, vec src) %{
22149 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22150 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22151 match(Set dst (VectorCastS2X src));
22152 format %{ "vector_cast_s2x $dst,$src\t!" %}
22153 ins_encode %{
22154 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22155 int src_vlen_enc = vector_length_encoding(this, $src);
22156 int vlen_enc = vector_length_encoding(this);
22157 switch (to_elem_bt) {
22158 case T_BYTE:
22159 if (!VM_Version::supports_avx512vl()) {
22160 vlen_enc = Assembler::AVX_512bit;
22161 }
22162 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22163 break;
22164 case T_INT:
22165 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22166 break;
22167 case T_FLOAT:
22168 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22169 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22170 break;
22171 case T_LONG:
22172 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22173 break;
22174 case T_DOUBLE: {
22175 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22176 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22177 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22178 break;
22179 }
22180 default:
22181 ShouldNotReachHere();
22182 }
22183 %}
22184 ins_pipe( pipe_slow );
22185 %}
22186
22187 instruct castItoX(vec dst, vec src) %{
22188 predicate(UseAVX <= 2 &&
22189 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22190 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22191 match(Set dst (VectorCastI2X src));
22192 format %{ "vector_cast_i2x $dst,$src" %}
22193 ins_encode %{
22194 assert(UseAVX > 0, "required");
22195
22196 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22197 int vlen_enc = vector_length_encoding(this, $src);
22198
22199 if (to_elem_bt == T_BYTE) {
22200 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22201 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22202 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22203 } else {
22204 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22205 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22206 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22207 }
22208 %}
22209 ins_pipe( pipe_slow );
22210 %}
22211
22212 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22213 predicate(UseAVX <= 2 &&
22214 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22215 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22216 match(Set dst (VectorCastI2X src));
22217 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22218 effect(TEMP dst, TEMP vtmp);
22219 ins_encode %{
22220 assert(UseAVX > 0, "required");
22221
22222 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22223 int vlen_enc = vector_length_encoding(this, $src);
22224
22225 if (to_elem_bt == T_BYTE) {
22226 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22227 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22228 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22229 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22230 } else {
22231 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22232 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22233 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22234 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22235 }
22236 %}
22237 ins_pipe( pipe_slow );
22238 %}
22239
22240 instruct vcastItoX_evex(vec dst, vec src) %{
22241 predicate(UseAVX > 2 ||
22242 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22243 match(Set dst (VectorCastI2X src));
22244 format %{ "vector_cast_i2x $dst,$src\t!" %}
22245 ins_encode %{
22246 assert(UseAVX > 0, "required");
22247
22248 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22249 int src_vlen_enc = vector_length_encoding(this, $src);
22250 int dst_vlen_enc = vector_length_encoding(this);
22251 switch (dst_elem_bt) {
22252 case T_BYTE:
22253 if (!VM_Version::supports_avx512vl()) {
22254 src_vlen_enc = Assembler::AVX_512bit;
22255 }
22256 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22257 break;
22258 case T_SHORT:
22259 if (!VM_Version::supports_avx512vl()) {
22260 src_vlen_enc = Assembler::AVX_512bit;
22261 }
22262 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22263 break;
22264 case T_FLOAT:
22265 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22266 break;
22267 case T_LONG:
22268 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22269 break;
22270 case T_DOUBLE:
22271 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22272 break;
22273 default:
22274 ShouldNotReachHere();
22275 }
22276 %}
22277 ins_pipe( pipe_slow );
22278 %}
22279
22280 instruct vcastLtoBS(vec dst, vec src) %{
22281 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22282 UseAVX <= 2);
22283 match(Set dst (VectorCastL2X src));
22284 format %{ "vector_cast_l2x $dst,$src" %}
22285 ins_encode %{
22286 assert(UseAVX > 0, "required");
22287
22288 int vlen = Matcher::vector_length_in_bytes(this, $src);
22289 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22290 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22291 : ExternalAddress(vector_int_to_short_mask());
22292 if (vlen <= 16) {
22293 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22294 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22295 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22296 } else {
22297 assert(vlen <= 32, "required");
22298 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22299 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22300 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22301 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22302 }
22303 if (to_elem_bt == T_BYTE) {
22304 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22305 }
22306 %}
22307 ins_pipe( pipe_slow );
22308 %}
22309
22310 instruct vcastLtoX_evex(vec dst, vec src) %{
22311 predicate(UseAVX > 2 ||
22312 (Matcher::vector_element_basic_type(n) == T_INT ||
22313 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22314 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22315 match(Set dst (VectorCastL2X src));
22316 format %{ "vector_cast_l2x $dst,$src\t!" %}
22317 ins_encode %{
22318 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22319 int vlen = Matcher::vector_length_in_bytes(this, $src);
22320 int vlen_enc = vector_length_encoding(this, $src);
22321 switch (to_elem_bt) {
22322 case T_BYTE:
22323 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22324 vlen_enc = Assembler::AVX_512bit;
22325 }
22326 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22327 break;
22328 case T_SHORT:
22329 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22330 vlen_enc = Assembler::AVX_512bit;
22331 }
22332 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22333 break;
22334 case T_INT:
22335 if (vlen == 8) {
22336 if ($dst$$XMMRegister != $src$$XMMRegister) {
22337 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22338 }
22339 } else if (vlen == 16) {
22340 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22341 } else if (vlen == 32) {
22342 if (UseAVX > 2) {
22343 if (!VM_Version::supports_avx512vl()) {
22344 vlen_enc = Assembler::AVX_512bit;
22345 }
22346 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22347 } else {
22348 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22349 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22350 }
22351 } else { // vlen == 64
22352 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22353 }
22354 break;
22355 case T_FLOAT:
22356 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22357 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22358 break;
22359 case T_DOUBLE:
22360 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22361 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22362 break;
22363
22364 default: assert(false, "%s", type2name(to_elem_bt));
22365 }
22366 %}
22367 ins_pipe( pipe_slow );
22368 %}
22369
22370 instruct vcastFtoD_reg(vec dst, vec src) %{
22371 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22372 match(Set dst (VectorCastF2X src));
22373 format %{ "vector_cast_f2d $dst,$src\t!" %}
22374 ins_encode %{
22375 int vlen_enc = vector_length_encoding(this);
22376 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22377 %}
22378 ins_pipe( pipe_slow );
22379 %}
22380
22381
22382 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22383 predicate(!VM_Version::supports_avx10_2() &&
22384 !VM_Version::supports_avx512vl() &&
22385 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22386 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22387 is_integral_type(Matcher::vector_element_basic_type(n)));
22388 match(Set dst (VectorCastF2X src));
22389 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22390 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22391 ins_encode %{
22392 int vlen_enc = vector_length_encoding(this, $src);
22393 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22394 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22395 // 32 bit addresses for register indirect addressing mode since stub constants
22396 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22397 // However, targets are free to increase this limit, but having a large code cache size
22398 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22399 // cap we save a temporary register allocation which in limiting case can prevent
22400 // spilling in high register pressure blocks.
22401 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22402 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22403 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22404 %}
22405 ins_pipe( pipe_slow );
22406 %}
22407
22408 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22409 predicate(!VM_Version::supports_avx10_2() &&
22410 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22411 is_integral_type(Matcher::vector_element_basic_type(n)));
22412 match(Set dst (VectorCastF2X src));
22413 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22414 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22415 ins_encode %{
22416 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22417 if (to_elem_bt == T_LONG) {
22418 int vlen_enc = vector_length_encoding(this);
22419 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22420 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22421 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22422 } else {
22423 int vlen_enc = vector_length_encoding(this, $src);
22424 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22425 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22426 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22427 }
22428 %}
22429 ins_pipe( pipe_slow );
22430 %}
22431
22432 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22433 predicate(VM_Version::supports_avx10_2() &&
22434 is_integral_type(Matcher::vector_element_basic_type(n)));
22435 match(Set dst (VectorCastF2X src));
22436 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22437 ins_encode %{
22438 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22439 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22440 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22441 %}
22442 ins_pipe( pipe_slow );
22443 %}
22444
22445 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22446 predicate(VM_Version::supports_avx10_2() &&
22447 is_integral_type(Matcher::vector_element_basic_type(n)));
22448 match(Set dst (VectorCastF2X (LoadVector src)));
22449 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22450 ins_encode %{
22451 int vlen = Matcher::vector_length(this);
22452 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22453 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22454 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22455 %}
22456 ins_pipe( pipe_slow );
22457 %}
22458
22459 instruct vcastDtoF_reg(vec dst, vec src) %{
22460 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22461 match(Set dst (VectorCastD2X src));
22462 format %{ "vector_cast_d2x $dst,$src\t!" %}
22463 ins_encode %{
22464 int vlen_enc = vector_length_encoding(this, $src);
22465 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22466 %}
22467 ins_pipe( pipe_slow );
22468 %}
22469
22470 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22471 predicate(!VM_Version::supports_avx10_2() &&
22472 !VM_Version::supports_avx512vl() &&
22473 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22474 is_integral_type(Matcher::vector_element_basic_type(n)));
22475 match(Set dst (VectorCastD2X src));
22476 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22477 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22478 ins_encode %{
22479 int vlen_enc = vector_length_encoding(this, $src);
22480 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22481 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22482 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22483 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22484 %}
22485 ins_pipe( pipe_slow );
22486 %}
22487
22488 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22489 predicate(!VM_Version::supports_avx10_2() &&
22490 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22491 is_integral_type(Matcher::vector_element_basic_type(n)));
22492 match(Set dst (VectorCastD2X src));
22493 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22494 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22495 ins_encode %{
22496 int vlen_enc = vector_length_encoding(this, $src);
22497 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22498 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22499 ExternalAddress(vector_float_signflip());
22500 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22501 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22502 %}
22503 ins_pipe( pipe_slow );
22504 %}
22505
22506 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22507 predicate(VM_Version::supports_avx10_2() &&
22508 is_integral_type(Matcher::vector_element_basic_type(n)));
22509 match(Set dst (VectorCastD2X src));
22510 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22511 ins_encode %{
22512 int vlen_enc = vector_length_encoding(this, $src);
22513 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22514 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22515 %}
22516 ins_pipe( pipe_slow );
22517 %}
22518
22519 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22520 predicate(VM_Version::supports_avx10_2() &&
22521 is_integral_type(Matcher::vector_element_basic_type(n)));
22522 match(Set dst (VectorCastD2X (LoadVector src)));
22523 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22524 ins_encode %{
22525 int vlen = Matcher::vector_length(this);
22526 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22527 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22528 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22529 %}
22530 ins_pipe( pipe_slow );
22531 %}
22532
22533 instruct vucast(vec dst, vec src) %{
22534 match(Set dst (VectorUCastB2X src));
22535 match(Set dst (VectorUCastS2X src));
22536 match(Set dst (VectorUCastI2X src));
22537 format %{ "vector_ucast $dst,$src\t!" %}
22538 ins_encode %{
22539 assert(UseAVX > 0, "required");
22540
22541 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22542 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22543 int vlen_enc = vector_length_encoding(this);
22544 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22545 %}
22546 ins_pipe( pipe_slow );
22547 %}
22548
22549 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22550 predicate(!VM_Version::supports_avx512vl() &&
22551 Matcher::vector_length_in_bytes(n) < 64 &&
22552 Matcher::vector_element_basic_type(n) == T_INT);
22553 match(Set dst (RoundVF src));
22554 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22555 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22556 ins_encode %{
22557 int vlen_enc = vector_length_encoding(this);
22558 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22559 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22560 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22561 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22562 %}
22563 ins_pipe( pipe_slow );
22564 %}
22565
22566 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22567 predicate((VM_Version::supports_avx512vl() ||
22568 Matcher::vector_length_in_bytes(n) == 64) &&
22569 Matcher::vector_element_basic_type(n) == T_INT);
22570 match(Set dst (RoundVF src));
22571 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22572 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22573 ins_encode %{
22574 int vlen_enc = vector_length_encoding(this);
22575 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22576 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22577 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22578 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22579 %}
22580 ins_pipe( pipe_slow );
22581 %}
22582
22583 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22584 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22585 match(Set dst (RoundVD src));
22586 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22587 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22588 ins_encode %{
22589 int vlen_enc = vector_length_encoding(this);
22590 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22591 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22592 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22593 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22594 %}
22595 ins_pipe( pipe_slow );
22596 %}
22597
22598 // --------------------------------- VectorMaskCmp --------------------------------------
22599
22600 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22601 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22602 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22603 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22604 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22605 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22606 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22607 ins_encode %{
22608 int vlen_enc = vector_length_encoding(this, $src1);
22609 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22610 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22611 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22612 } else {
22613 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22614 }
22615 %}
22616 ins_pipe( pipe_slow );
22617 %}
22618
22619 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22620 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22621 n->bottom_type()->isa_vectmask() == nullptr &&
22622 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22623 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22624 effect(TEMP ktmp);
22625 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22626 ins_encode %{
22627 int vlen_enc = Assembler::AVX_512bit;
22628 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22629 KRegister mask = k0; // The comparison itself is not being masked.
22630 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22631 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22632 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22633 } else {
22634 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22635 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22636 }
22637 %}
22638 ins_pipe( pipe_slow );
22639 %}
22640
22641 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22642 predicate(n->bottom_type()->isa_vectmask() &&
22643 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22644 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22645 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22646 ins_encode %{
22647 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22648 int vlen_enc = vector_length_encoding(this, $src1);
22649 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22650 KRegister mask = k0; // The comparison itself is not being masked.
22651 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22652 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22653 } else {
22654 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22655 }
22656 %}
22657 ins_pipe( pipe_slow );
22658 %}
22659
22660 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22661 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22662 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22663 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22664 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22665 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22666 (n->in(2)->get_int() == BoolTest::eq ||
22667 n->in(2)->get_int() == BoolTest::lt ||
22668 n->in(2)->get_int() == BoolTest::gt)); // cond
22669 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22670 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22671 ins_encode %{
22672 int vlen_enc = vector_length_encoding(this, $src1);
22673 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22674 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22675 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22676 %}
22677 ins_pipe( pipe_slow );
22678 %}
22679
22680 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22681 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22682 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22683 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22684 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22685 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22686 (n->in(2)->get_int() == BoolTest::ne ||
22687 n->in(2)->get_int() == BoolTest::le ||
22688 n->in(2)->get_int() == BoolTest::ge)); // cond
22689 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22690 effect(TEMP dst, TEMP xtmp);
22691 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22692 ins_encode %{
22693 int vlen_enc = vector_length_encoding(this, $src1);
22694 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22695 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22696 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22697 %}
22698 ins_pipe( pipe_slow );
22699 %}
22700
22701 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22702 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22703 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22704 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22705 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22706 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22707 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22708 effect(TEMP dst, TEMP xtmp);
22709 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22710 ins_encode %{
22711 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22712 int vlen_enc = vector_length_encoding(this, $src1);
22713 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22714 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22715
22716 if (vlen_enc == Assembler::AVX_128bit) {
22717 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22718 } else {
22719 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22720 }
22721 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22722 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22723 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22724 %}
22725 ins_pipe( pipe_slow );
22726 %}
22727
22728 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22729 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22730 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22731 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22732 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22733 effect(TEMP ktmp);
22734 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22735 ins_encode %{
22736 assert(UseAVX > 2, "required");
22737
22738 int vlen_enc = vector_length_encoding(this, $src1);
22739 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22740 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22741 KRegister mask = k0; // The comparison itself is not being masked.
22742 bool merge = false;
22743 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22744
22745 switch (src1_elem_bt) {
22746 case T_INT: {
22747 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22748 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22749 break;
22750 }
22751 case T_LONG: {
22752 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22753 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22754 break;
22755 }
22756 default: assert(false, "%s", type2name(src1_elem_bt));
22757 }
22758 %}
22759 ins_pipe( pipe_slow );
22760 %}
22761
22762
22763 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22764 predicate(n->bottom_type()->isa_vectmask() &&
22765 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22766 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22767 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22768 ins_encode %{
22769 assert(UseAVX > 2, "required");
22770 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22771
22772 int vlen_enc = vector_length_encoding(this, $src1);
22773 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22774 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22775 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22776
22777 // Comparison i
22778 switch (src1_elem_bt) {
22779 case T_BYTE: {
22780 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22781 break;
22782 }
22783 case T_SHORT: {
22784 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22785 break;
22786 }
22787 case T_INT: {
22788 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22789 break;
22790 }
22791 case T_LONG: {
22792 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22793 break;
22794 }
22795 default: assert(false, "%s", type2name(src1_elem_bt));
22796 }
22797 %}
22798 ins_pipe( pipe_slow );
22799 %}
22800
22801 // Extract
22802
22803 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22804 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22805 match(Set dst (ExtractI src idx));
22806 match(Set dst (ExtractS src idx));
22807 match(Set dst (ExtractB src idx));
22808 format %{ "extractI $dst,$src,$idx\t!" %}
22809 ins_encode %{
22810 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22811
22812 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22813 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22814 %}
22815 ins_pipe( pipe_slow );
22816 %}
22817
22818 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22819 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22820 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22821 match(Set dst (ExtractI src idx));
22822 match(Set dst (ExtractS src idx));
22823 match(Set dst (ExtractB src idx));
22824 effect(TEMP vtmp);
22825 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22826 ins_encode %{
22827 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22828
22829 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22830 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22831 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22832 %}
22833 ins_pipe( pipe_slow );
22834 %}
22835
22836 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22837 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22838 match(Set dst (ExtractL src idx));
22839 format %{ "extractL $dst,$src,$idx\t!" %}
22840 ins_encode %{
22841 assert(UseSSE >= 4, "required");
22842 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22843
22844 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22845 %}
22846 ins_pipe( pipe_slow );
22847 %}
22848
22849 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22850 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22851 Matcher::vector_length(n->in(1)) == 8); // src
22852 match(Set dst (ExtractL src idx));
22853 effect(TEMP vtmp);
22854 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22855 ins_encode %{
22856 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22857
22858 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22859 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22860 %}
22861 ins_pipe( pipe_slow );
22862 %}
22863
22864 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22865 predicate(Matcher::vector_length(n->in(1)) <= 4);
22866 match(Set dst (ExtractF src idx));
22867 effect(TEMP dst, TEMP vtmp);
22868 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22869 ins_encode %{
22870 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22871
22872 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22873 %}
22874 ins_pipe( pipe_slow );
22875 %}
22876
22877 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22878 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22879 Matcher::vector_length(n->in(1)/*src*/) == 16);
22880 match(Set dst (ExtractF src idx));
22881 effect(TEMP vtmp);
22882 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22883 ins_encode %{
22884 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22885
22886 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22887 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22888 %}
22889 ins_pipe( pipe_slow );
22890 %}
22891
22892 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22893 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22894 match(Set dst (ExtractD src idx));
22895 format %{ "extractD $dst,$src,$idx\t!" %}
22896 ins_encode %{
22897 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22898
22899 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22900 %}
22901 ins_pipe( pipe_slow );
22902 %}
22903
22904 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22905 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22906 Matcher::vector_length(n->in(1)) == 8); // src
22907 match(Set dst (ExtractD src idx));
22908 effect(TEMP vtmp);
22909 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22910 ins_encode %{
22911 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22912
22913 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22914 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22915 %}
22916 ins_pipe( pipe_slow );
22917 %}
22918
22919 // --------------------------------- Vector Blend --------------------------------------
22920
22921 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22922 predicate(UseAVX == 0);
22923 match(Set dst (VectorBlend (Binary dst src) mask));
22924 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22925 effect(TEMP tmp);
22926 ins_encode %{
22927 assert(UseSSE >= 4, "required");
22928
22929 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22930 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22931 }
22932 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22933 %}
22934 ins_pipe( pipe_slow );
22935 %}
22936
22937 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22938 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22939 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22940 Matcher::vector_length_in_bytes(n) <= 32 &&
22941 is_integral_type(Matcher::vector_element_basic_type(n)));
22942 match(Set dst (VectorBlend (Binary src1 src2) mask));
22943 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22944 ins_encode %{
22945 int vlen_enc = vector_length_encoding(this);
22946 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22947 %}
22948 ins_pipe( pipe_slow );
22949 %}
22950
22951 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22952 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22953 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22954 Matcher::vector_length_in_bytes(n) <= 32 &&
22955 !is_integral_type(Matcher::vector_element_basic_type(n)));
22956 match(Set dst (VectorBlend (Binary src1 src2) mask));
22957 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22958 ins_encode %{
22959 int vlen_enc = vector_length_encoding(this);
22960 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22961 %}
22962 ins_pipe( pipe_slow );
22963 %}
22964
22965 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22966 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22967 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22968 Matcher::vector_length_in_bytes(n) <= 32);
22969 match(Set dst (VectorBlend (Binary src1 src2) mask));
22970 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22971 effect(TEMP vtmp, TEMP dst);
22972 ins_encode %{
22973 int vlen_enc = vector_length_encoding(this);
22974 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22975 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22976 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22977 %}
22978 ins_pipe( pipe_slow );
22979 %}
22980
22981 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22982 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22983 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22984 match(Set dst (VectorBlend (Binary src1 src2) mask));
22985 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22986 effect(TEMP ktmp);
22987 ins_encode %{
22988 int vlen_enc = Assembler::AVX_512bit;
22989 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22990 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22991 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22992 %}
22993 ins_pipe( pipe_slow );
22994 %}
22995
22996
22997 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22998 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22999 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23000 VM_Version::supports_avx512bw()));
23001 match(Set dst (VectorBlend (Binary src1 src2) mask));
23002 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23003 ins_encode %{
23004 int vlen_enc = vector_length_encoding(this);
23005 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23006 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23007 %}
23008 ins_pipe( pipe_slow );
23009 %}
23010
23011 // --------------------------------- ABS --------------------------------------
23012 // a = |a|
23013 instruct vabsB_reg(vec dst, vec src) %{
23014 match(Set dst (AbsVB src));
23015 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23016 ins_encode %{
23017 uint vlen = Matcher::vector_length(this);
23018 if (vlen <= 16) {
23019 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23020 } else {
23021 int vlen_enc = vector_length_encoding(this);
23022 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23023 }
23024 %}
23025 ins_pipe( pipe_slow );
23026 %}
23027
23028 instruct vabsS_reg(vec dst, vec src) %{
23029 match(Set dst (AbsVS src));
23030 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23031 ins_encode %{
23032 uint vlen = Matcher::vector_length(this);
23033 if (vlen <= 8) {
23034 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23035 } else {
23036 int vlen_enc = vector_length_encoding(this);
23037 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23038 }
23039 %}
23040 ins_pipe( pipe_slow );
23041 %}
23042
23043 instruct vabsI_reg(vec dst, vec src) %{
23044 match(Set dst (AbsVI src));
23045 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23046 ins_encode %{
23047 uint vlen = Matcher::vector_length(this);
23048 if (vlen <= 4) {
23049 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23050 } else {
23051 int vlen_enc = vector_length_encoding(this);
23052 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23053 }
23054 %}
23055 ins_pipe( pipe_slow );
23056 %}
23057
23058 instruct vabsL_reg(vec dst, vec src) %{
23059 match(Set dst (AbsVL src));
23060 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23061 ins_encode %{
23062 assert(UseAVX > 2, "required");
23063 int vlen_enc = vector_length_encoding(this);
23064 if (!VM_Version::supports_avx512vl()) {
23065 vlen_enc = Assembler::AVX_512bit;
23066 }
23067 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23068 %}
23069 ins_pipe( pipe_slow );
23070 %}
23071
23072 // --------------------------------- ABSNEG --------------------------------------
23073
23074 instruct vabsnegF(vec dst, vec src) %{
23075 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23076 match(Set dst (AbsVF src));
23077 match(Set dst (NegVF src));
23078 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23079 ins_cost(150);
23080 ins_encode %{
23081 int opcode = this->ideal_Opcode();
23082 int vlen = Matcher::vector_length(this);
23083 if (vlen == 2) {
23084 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23085 } else {
23086 assert(vlen == 8 || vlen == 16, "required");
23087 int vlen_enc = vector_length_encoding(this);
23088 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23089 }
23090 %}
23091 ins_pipe( pipe_slow );
23092 %}
23093
23094 instruct vabsneg4F(vec dst) %{
23095 predicate(Matcher::vector_length(n) == 4);
23096 match(Set dst (AbsVF dst));
23097 match(Set dst (NegVF dst));
23098 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23099 ins_cost(150);
23100 ins_encode %{
23101 int opcode = this->ideal_Opcode();
23102 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23103 %}
23104 ins_pipe( pipe_slow );
23105 %}
23106
23107 instruct vabsnegD(vec dst, vec src) %{
23108 match(Set dst (AbsVD src));
23109 match(Set dst (NegVD src));
23110 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23111 ins_encode %{
23112 int opcode = this->ideal_Opcode();
23113 uint vlen = Matcher::vector_length(this);
23114 if (vlen == 2) {
23115 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23116 } else {
23117 int vlen_enc = vector_length_encoding(this);
23118 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23119 }
23120 %}
23121 ins_pipe( pipe_slow );
23122 %}
23123
23124 //------------------------------------- VectorTest --------------------------------------------
23125
23126 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23127 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23128 match(Set cr (VectorTest src1 src2));
23129 effect(TEMP vtmp);
23130 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23131 ins_encode %{
23132 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23133 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23134 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23135 %}
23136 ins_pipe( pipe_slow );
23137 %}
23138
23139 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23140 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23141 match(Set cr (VectorTest src1 src2));
23142 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23143 ins_encode %{
23144 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23145 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23146 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23147 %}
23148 ins_pipe( pipe_slow );
23149 %}
23150
23151 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23152 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23153 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23154 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23155 match(Set cr (VectorTest src1 src2));
23156 effect(TEMP tmp);
23157 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23158 ins_encode %{
23159 uint masklen = Matcher::vector_length(this, $src1);
23160 __ kmovwl($tmp$$Register, $src1$$KRegister);
23161 __ andl($tmp$$Register, (1 << masklen) - 1);
23162 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23163 %}
23164 ins_pipe( pipe_slow );
23165 %}
23166
23167 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23168 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23169 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23170 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23171 match(Set cr (VectorTest src1 src2));
23172 effect(TEMP tmp);
23173 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23174 ins_encode %{
23175 uint masklen = Matcher::vector_length(this, $src1);
23176 __ kmovwl($tmp$$Register, $src1$$KRegister);
23177 __ andl($tmp$$Register, (1 << masklen) - 1);
23178 %}
23179 ins_pipe( pipe_slow );
23180 %}
23181
23182 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23183 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23184 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23185 match(Set cr (VectorTest src1 src2));
23186 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23187 ins_encode %{
23188 uint masklen = Matcher::vector_length(this, $src1);
23189 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23190 %}
23191 ins_pipe( pipe_slow );
23192 %}
23193
23194 //------------------------------------- LoadMask --------------------------------------------
23195
23196 instruct loadMask(legVec dst, legVec src) %{
23197 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23198 match(Set dst (VectorLoadMask src));
23199 effect(TEMP dst);
23200 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23201 ins_encode %{
23202 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23203 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23204 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23205 %}
23206 ins_pipe( pipe_slow );
23207 %}
23208
23209 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23210 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23211 match(Set dst (VectorLoadMask src));
23212 effect(TEMP xtmp);
23213 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23214 ins_encode %{
23215 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23216 true, Assembler::AVX_512bit);
23217 %}
23218 ins_pipe( pipe_slow );
23219 %}
23220
23221 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23222 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23223 match(Set dst (VectorLoadMask src));
23224 effect(TEMP xtmp);
23225 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23226 ins_encode %{
23227 int vlen_enc = vector_length_encoding(in(1));
23228 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23229 false, vlen_enc);
23230 %}
23231 ins_pipe( pipe_slow );
23232 %}
23233
23234 //------------------------------------- StoreMask --------------------------------------------
23235
23236 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23237 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23238 match(Set dst (VectorStoreMask src size));
23239 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23240 ins_encode %{
23241 int vlen = Matcher::vector_length(this);
23242 if (vlen <= 16 && UseAVX <= 2) {
23243 assert(UseSSE >= 3, "required");
23244 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23245 } else {
23246 assert(UseAVX > 0, "required");
23247 int src_vlen_enc = vector_length_encoding(this, $src);
23248 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23249 }
23250 %}
23251 ins_pipe( pipe_slow );
23252 %}
23253
23254 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23255 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23256 match(Set dst (VectorStoreMask src size));
23257 effect(TEMP_DEF dst, TEMP xtmp);
23258 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23259 ins_encode %{
23260 int vlen_enc = Assembler::AVX_128bit;
23261 int vlen = Matcher::vector_length(this);
23262 if (vlen <= 8) {
23263 assert(UseSSE >= 3, "required");
23264 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23265 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23266 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23267 } else {
23268 assert(UseAVX > 0, "required");
23269 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23270 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23271 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23272 }
23273 %}
23274 ins_pipe( pipe_slow );
23275 %}
23276
23277 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23278 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23279 match(Set dst (VectorStoreMask src size));
23280 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23281 effect(TEMP_DEF dst, TEMP xtmp);
23282 ins_encode %{
23283 int vlen_enc = Assembler::AVX_128bit;
23284 int vlen = Matcher::vector_length(this);
23285 if (vlen <= 4) {
23286 assert(UseSSE >= 3, "required");
23287 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23288 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23289 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23290 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23291 } else {
23292 assert(UseAVX > 0, "required");
23293 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23294 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23295 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23296 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23297 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23298 }
23299 %}
23300 ins_pipe( pipe_slow );
23301 %}
23302
23303 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23304 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23305 match(Set dst (VectorStoreMask src size));
23306 effect(TEMP_DEF dst, TEMP xtmp);
23307 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23308 ins_encode %{
23309 assert(UseSSE >= 3, "required");
23310 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23311 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23312 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23313 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23314 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23315 %}
23316 ins_pipe( pipe_slow );
23317 %}
23318
23319 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23320 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23321 match(Set dst (VectorStoreMask src size));
23322 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23323 effect(TEMP_DEF dst, TEMP vtmp);
23324 ins_encode %{
23325 int vlen_enc = Assembler::AVX_128bit;
23326 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23327 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23328 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23329 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23330 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23331 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23332 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23333 %}
23334 ins_pipe( pipe_slow );
23335 %}
23336
23337 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23338 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23339 match(Set dst (VectorStoreMask src size));
23340 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23341 ins_encode %{
23342 int src_vlen_enc = vector_length_encoding(this, $src);
23343 int dst_vlen_enc = vector_length_encoding(this);
23344 if (!VM_Version::supports_avx512vl()) {
23345 src_vlen_enc = Assembler::AVX_512bit;
23346 }
23347 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23348 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23349 %}
23350 ins_pipe( pipe_slow );
23351 %}
23352
23353 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23354 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23355 match(Set dst (VectorStoreMask src size));
23356 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23357 ins_encode %{
23358 int src_vlen_enc = vector_length_encoding(this, $src);
23359 int dst_vlen_enc = vector_length_encoding(this);
23360 if (!VM_Version::supports_avx512vl()) {
23361 src_vlen_enc = Assembler::AVX_512bit;
23362 }
23363 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23364 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23365 %}
23366 ins_pipe( pipe_slow );
23367 %}
23368
23369 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23370 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23371 match(Set dst (VectorStoreMask mask size));
23372 effect(TEMP_DEF dst);
23373 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23374 ins_encode %{
23375 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23376 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23377 false, Assembler::AVX_512bit, noreg);
23378 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23379 %}
23380 ins_pipe( pipe_slow );
23381 %}
23382
23383 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23384 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23385 match(Set dst (VectorStoreMask mask size));
23386 effect(TEMP_DEF dst);
23387 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23388 ins_encode %{
23389 int dst_vlen_enc = vector_length_encoding(this);
23390 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23391 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23392 %}
23393 ins_pipe( pipe_slow );
23394 %}
23395
23396 instruct vmaskcast_evex(kReg dst) %{
23397 match(Set dst (VectorMaskCast dst));
23398 ins_cost(0);
23399 format %{ "vector_mask_cast $dst" %}
23400 ins_encode %{
23401 // empty
23402 %}
23403 ins_pipe(empty);
23404 %}
23405
23406 instruct vmaskcast(vec dst) %{
23407 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23408 match(Set dst (VectorMaskCast dst));
23409 ins_cost(0);
23410 format %{ "vector_mask_cast $dst" %}
23411 ins_encode %{
23412 // empty
23413 %}
23414 ins_pipe(empty);
23415 %}
23416
23417 instruct vmaskcast_avx(vec dst, vec src) %{
23418 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23419 match(Set dst (VectorMaskCast src));
23420 format %{ "vector_mask_cast $dst, $src" %}
23421 ins_encode %{
23422 int vlen = Matcher::vector_length(this);
23423 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23424 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23425 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23426 %}
23427 ins_pipe(pipe_slow);
23428 %}
23429
23430 //-------------------------------- Load Iota Indices ----------------------------------
23431
23432 instruct loadIotaIndices(vec dst, immI_0 src) %{
23433 match(Set dst (VectorLoadConst src));
23434 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23435 ins_encode %{
23436 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23437 BasicType bt = Matcher::vector_element_basic_type(this);
23438 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23439 %}
23440 ins_pipe( pipe_slow );
23441 %}
23442
23443 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23444 match(Set dst (PopulateIndex src1 src2));
23445 effect(TEMP dst, TEMP vtmp);
23446 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23447 ins_encode %{
23448 assert($src2$$constant == 1, "required");
23449 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23450 int vlen_enc = vector_length_encoding(this);
23451 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23452 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23453 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23454 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23455 %}
23456 ins_pipe( pipe_slow );
23457 %}
23458
23459 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23460 match(Set dst (PopulateIndex src1 src2));
23461 effect(TEMP dst, TEMP vtmp);
23462 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23463 ins_encode %{
23464 assert($src2$$constant == 1, "required");
23465 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23466 int vlen_enc = vector_length_encoding(this);
23467 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23468 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23469 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23470 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23471 %}
23472 ins_pipe( pipe_slow );
23473 %}
23474
23475 //-------------------------------- Rearrange ----------------------------------
23476
23477 // LoadShuffle/Rearrange for Byte
23478 instruct rearrangeB(vec dst, vec shuffle) %{
23479 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23480 Matcher::vector_length(n) < 32);
23481 match(Set dst (VectorRearrange dst shuffle));
23482 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23483 ins_encode %{
23484 assert(UseSSE >= 4, "required");
23485 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23486 %}
23487 ins_pipe( pipe_slow );
23488 %}
23489
23490 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23491 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23492 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23493 match(Set dst (VectorRearrange src shuffle));
23494 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23495 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23496 ins_encode %{
23497 assert(UseAVX >= 2, "required");
23498 // Swap src into vtmp1
23499 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23500 // Shuffle swapped src to get entries from other 128 bit lane
23501 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23502 // Shuffle original src to get entries from self 128 bit lane
23503 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23504 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23505 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23506 // Perform the blend
23507 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23508 %}
23509 ins_pipe( pipe_slow );
23510 %}
23511
23512
23513 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23514 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23515 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23516 match(Set dst (VectorRearrange src shuffle));
23517 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23518 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23519 ins_encode %{
23520 int vlen_enc = vector_length_encoding(this);
23521 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23522 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23523 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23524 %}
23525 ins_pipe( pipe_slow );
23526 %}
23527
23528 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23529 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23530 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23531 match(Set dst (VectorRearrange src shuffle));
23532 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23533 ins_encode %{
23534 int vlen_enc = vector_length_encoding(this);
23535 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23536 %}
23537 ins_pipe( pipe_slow );
23538 %}
23539
23540 // LoadShuffle/Rearrange for Short
23541
23542 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23543 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23544 !VM_Version::supports_avx512bw());
23545 match(Set dst (VectorLoadShuffle src));
23546 effect(TEMP dst, TEMP vtmp);
23547 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23548 ins_encode %{
23549 // Create a byte shuffle mask from short shuffle mask
23550 // only byte shuffle instruction available on these platforms
23551 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23552 if (UseAVX == 0) {
23553 assert(vlen_in_bytes <= 16, "required");
23554 // Multiply each shuffle by two to get byte index
23555 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23556 __ psllw($vtmp$$XMMRegister, 1);
23557
23558 // Duplicate to create 2 copies of byte index
23559 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23560 __ psllw($dst$$XMMRegister, 8);
23561 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23562
23563 // Add one to get alternate byte index
23564 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23565 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23566 } else {
23567 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23568 int vlen_enc = vector_length_encoding(this);
23569 // Multiply each shuffle by two to get byte index
23570 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23571
23572 // Duplicate to create 2 copies of byte index
23573 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23574 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23575
23576 // Add one to get alternate byte index
23577 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23578 }
23579 %}
23580 ins_pipe( pipe_slow );
23581 %}
23582
23583 instruct rearrangeS(vec dst, vec shuffle) %{
23584 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23585 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23586 match(Set dst (VectorRearrange dst shuffle));
23587 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23588 ins_encode %{
23589 assert(UseSSE >= 4, "required");
23590 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23591 %}
23592 ins_pipe( pipe_slow );
23593 %}
23594
23595 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23596 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23597 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23598 match(Set dst (VectorRearrange src shuffle));
23599 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23600 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23601 ins_encode %{
23602 assert(UseAVX >= 2, "required");
23603 // Swap src into vtmp1
23604 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23605 // Shuffle swapped src to get entries from other 128 bit lane
23606 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23607 // Shuffle original src to get entries from self 128 bit lane
23608 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23609 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23610 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23611 // Perform the blend
23612 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23613 %}
23614 ins_pipe( pipe_slow );
23615 %}
23616
23617 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23618 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23619 VM_Version::supports_avx512bw());
23620 match(Set dst (VectorRearrange src shuffle));
23621 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23622 ins_encode %{
23623 int vlen_enc = vector_length_encoding(this);
23624 if (!VM_Version::supports_avx512vl()) {
23625 vlen_enc = Assembler::AVX_512bit;
23626 }
23627 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23628 %}
23629 ins_pipe( pipe_slow );
23630 %}
23631
23632 // LoadShuffle/Rearrange for Integer and Float
23633
23634 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23635 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23636 Matcher::vector_length(n) == 4 && UseAVX == 0);
23637 match(Set dst (VectorLoadShuffle src));
23638 effect(TEMP dst, TEMP vtmp);
23639 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23640 ins_encode %{
23641 assert(UseSSE >= 4, "required");
23642
23643 // Create a byte shuffle mask from int shuffle mask
23644 // only byte shuffle instruction available on these platforms
23645
23646 // Duplicate and multiply each shuffle by 4
23647 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23648 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23649 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23650 __ psllw($vtmp$$XMMRegister, 2);
23651
23652 // Duplicate again to create 4 copies of byte index
23653 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23654 __ psllw($dst$$XMMRegister, 8);
23655 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23656
23657 // Add 3,2,1,0 to get alternate byte index
23658 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23659 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23660 %}
23661 ins_pipe( pipe_slow );
23662 %}
23663
23664 instruct rearrangeI(vec dst, vec shuffle) %{
23665 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23666 UseAVX == 0);
23667 match(Set dst (VectorRearrange dst shuffle));
23668 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23669 ins_encode %{
23670 assert(UseSSE >= 4, "required");
23671 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23672 %}
23673 ins_pipe( pipe_slow );
23674 %}
23675
23676 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23677 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23678 UseAVX > 0);
23679 match(Set dst (VectorRearrange src shuffle));
23680 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23681 ins_encode %{
23682 int vlen_enc = vector_length_encoding(this);
23683 BasicType bt = Matcher::vector_element_basic_type(this);
23684 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23685 %}
23686 ins_pipe( pipe_slow );
23687 %}
23688
23689 // LoadShuffle/Rearrange for Long and Double
23690
23691 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23692 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23693 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23694 match(Set dst (VectorLoadShuffle src));
23695 effect(TEMP dst, TEMP vtmp);
23696 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23697 ins_encode %{
23698 assert(UseAVX >= 2, "required");
23699
23700 int vlen_enc = vector_length_encoding(this);
23701 // Create a double word shuffle mask from long shuffle mask
23702 // only double word shuffle instruction available on these platforms
23703
23704 // Multiply each shuffle by two to get double word index
23705 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23706
23707 // Duplicate each double word shuffle
23708 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23709 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23710
23711 // Add one to get alternate double word index
23712 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23713 %}
23714 ins_pipe( pipe_slow );
23715 %}
23716
23717 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23718 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23719 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23720 match(Set dst (VectorRearrange src shuffle));
23721 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23722 ins_encode %{
23723 assert(UseAVX >= 2, "required");
23724
23725 int vlen_enc = vector_length_encoding(this);
23726 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23727 %}
23728 ins_pipe( pipe_slow );
23729 %}
23730
23731 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23732 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23733 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23734 match(Set dst (VectorRearrange src shuffle));
23735 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23736 ins_encode %{
23737 assert(UseAVX > 2, "required");
23738
23739 int vlen_enc = vector_length_encoding(this);
23740 if (vlen_enc == Assembler::AVX_128bit) {
23741 vlen_enc = Assembler::AVX_256bit;
23742 }
23743 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23744 %}
23745 ins_pipe( pipe_slow );
23746 %}
23747
23748 // --------------------------------- FMA --------------------------------------
23749 // a * b + c
23750
23751 instruct vfmaF_reg(vec a, vec b, vec c) %{
23752 match(Set c (FmaVF c (Binary a b)));
23753 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23754 ins_cost(150);
23755 ins_encode %{
23756 assert(UseFMA, "not enabled");
23757 int vlen_enc = vector_length_encoding(this);
23758 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23759 %}
23760 ins_pipe( pipe_slow );
23761 %}
23762
23763 instruct vfmaF_mem(vec a, memory b, vec c) %{
23764 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23765 match(Set c (FmaVF c (Binary a (LoadVector b))));
23766 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23767 ins_cost(150);
23768 ins_encode %{
23769 assert(UseFMA, "not enabled");
23770 int vlen_enc = vector_length_encoding(this);
23771 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23772 %}
23773 ins_pipe( pipe_slow );
23774 %}
23775
23776 instruct vfmaD_reg(vec a, vec b, vec c) %{
23777 match(Set c (FmaVD c (Binary a b)));
23778 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23779 ins_cost(150);
23780 ins_encode %{
23781 assert(UseFMA, "not enabled");
23782 int vlen_enc = vector_length_encoding(this);
23783 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23784 %}
23785 ins_pipe( pipe_slow );
23786 %}
23787
23788 instruct vfmaD_mem(vec a, memory b, vec c) %{
23789 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23790 match(Set c (FmaVD c (Binary a (LoadVector b))));
23791 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23792 ins_cost(150);
23793 ins_encode %{
23794 assert(UseFMA, "not enabled");
23795 int vlen_enc = vector_length_encoding(this);
23796 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23797 %}
23798 ins_pipe( pipe_slow );
23799 %}
23800
23801 // --------------------------------- Vector Multiply Add --------------------------------------
23802
23803 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23804 predicate(UseAVX == 0);
23805 match(Set dst (MulAddVS2VI dst src1));
23806 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23807 ins_encode %{
23808 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23809 %}
23810 ins_pipe( pipe_slow );
23811 %}
23812
23813 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23814 predicate(UseAVX > 0);
23815 match(Set dst (MulAddVS2VI src1 src2));
23816 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23817 ins_encode %{
23818 int vlen_enc = vector_length_encoding(this);
23819 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23820 %}
23821 ins_pipe( pipe_slow );
23822 %}
23823
23824 // --------------------------------- Vector Multiply Add Add ----------------------------------
23825
23826 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23827 predicate(VM_Version::supports_avx512_vnni());
23828 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23829 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23830 ins_encode %{
23831 assert(UseAVX > 2, "required");
23832 int vlen_enc = vector_length_encoding(this);
23833 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23834 %}
23835 ins_pipe( pipe_slow );
23836 ins_cost(10);
23837 %}
23838
23839 // --------------------------------- PopCount --------------------------------------
23840
23841 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23842 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23843 match(Set dst (PopCountVI src));
23844 match(Set dst (PopCountVL src));
23845 format %{ "vector_popcount_integral $dst, $src" %}
23846 ins_encode %{
23847 int opcode = this->ideal_Opcode();
23848 int vlen_enc = vector_length_encoding(this, $src);
23849 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23850 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23851 %}
23852 ins_pipe( pipe_slow );
23853 %}
23854
23855 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23856 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23857 match(Set dst (PopCountVI src mask));
23858 match(Set dst (PopCountVL src mask));
23859 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23860 ins_encode %{
23861 int vlen_enc = vector_length_encoding(this, $src);
23862 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23863 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23864 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23865 %}
23866 ins_pipe( pipe_slow );
23867 %}
23868
23869 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23870 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23871 match(Set dst (PopCountVI src));
23872 match(Set dst (PopCountVL src));
23873 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23874 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23875 ins_encode %{
23876 int opcode = this->ideal_Opcode();
23877 int vlen_enc = vector_length_encoding(this, $src);
23878 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23879 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23880 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23881 %}
23882 ins_pipe( pipe_slow );
23883 %}
23884
23885 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23886
23887 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23888 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23889 Matcher::vector_length_in_bytes(n->in(1))));
23890 match(Set dst (CountTrailingZerosV src));
23891 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23892 ins_cost(400);
23893 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23894 ins_encode %{
23895 int vlen_enc = vector_length_encoding(this, $src);
23896 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23897 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23898 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23899 %}
23900 ins_pipe( pipe_slow );
23901 %}
23902
23903 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23904 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23905 VM_Version::supports_avx512cd() &&
23906 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23907 match(Set dst (CountTrailingZerosV src));
23908 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23909 ins_cost(400);
23910 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23911 ins_encode %{
23912 int vlen_enc = vector_length_encoding(this, $src);
23913 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23914 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23915 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23916 %}
23917 ins_pipe( pipe_slow );
23918 %}
23919
23920 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23921 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23922 match(Set dst (CountTrailingZerosV src));
23923 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23924 ins_cost(400);
23925 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23926 ins_encode %{
23927 int vlen_enc = vector_length_encoding(this, $src);
23928 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23929 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23930 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23931 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23932 %}
23933 ins_pipe( pipe_slow );
23934 %}
23935
23936 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23937 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23938 match(Set dst (CountTrailingZerosV src));
23939 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23940 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23941 ins_encode %{
23942 int vlen_enc = vector_length_encoding(this, $src);
23943 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23944 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23945 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23946 %}
23947 ins_pipe( pipe_slow );
23948 %}
23949
23950
23951 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23952
23953 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23954 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23955 effect(TEMP dst);
23956 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23957 ins_encode %{
23958 int vector_len = vector_length_encoding(this);
23959 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23960 %}
23961 ins_pipe( pipe_slow );
23962 %}
23963
23964 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23965 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23966 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23967 effect(TEMP dst);
23968 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23969 ins_encode %{
23970 int vector_len = vector_length_encoding(this);
23971 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23972 %}
23973 ins_pipe( pipe_slow );
23974 %}
23975
23976 // --------------------------------- Rotation Operations ----------------------------------
23977 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23978 match(Set dst (RotateLeftV src shift));
23979 match(Set dst (RotateRightV src shift));
23980 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23981 ins_encode %{
23982 int opcode = this->ideal_Opcode();
23983 int vector_len = vector_length_encoding(this);
23984 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23985 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23986 %}
23987 ins_pipe( pipe_slow );
23988 %}
23989
23990 instruct vprorate(vec dst, vec src, vec shift) %{
23991 match(Set dst (RotateLeftV src shift));
23992 match(Set dst (RotateRightV src shift));
23993 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23994 ins_encode %{
23995 int opcode = this->ideal_Opcode();
23996 int vector_len = vector_length_encoding(this);
23997 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23998 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23999 %}
24000 ins_pipe( pipe_slow );
24001 %}
24002
24003 // ---------------------------------- Masked Operations ------------------------------------
24004 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24005 predicate(!n->in(3)->bottom_type()->isa_vectmask());
24006 match(Set dst (LoadVectorMasked mem mask));
24007 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24008 ins_encode %{
24009 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24010 int vlen_enc = vector_length_encoding(this);
24011 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24012 %}
24013 ins_pipe( pipe_slow );
24014 %}
24015
24016
24017 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24018 predicate(n->in(3)->bottom_type()->isa_vectmask());
24019 match(Set dst (LoadVectorMasked mem mask));
24020 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24021 ins_encode %{
24022 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24023 int vector_len = vector_length_encoding(this);
24024 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24025 %}
24026 ins_pipe( pipe_slow );
24027 %}
24028
24029 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24030 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24031 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24032 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24033 ins_encode %{
24034 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24035 int vlen_enc = vector_length_encoding(src_node);
24036 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24037 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24038 %}
24039 ins_pipe( pipe_slow );
24040 %}
24041
24042 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24043 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24044 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24045 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24046 ins_encode %{
24047 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24048 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24049 int vlen_enc = vector_length_encoding(src_node);
24050 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24051 %}
24052 ins_pipe( pipe_slow );
24053 %}
24054
24055 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24056 match(Set addr (VerifyVectorAlignment addr mask));
24057 effect(KILL cr);
24058 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24059 ins_encode %{
24060 Label Lskip;
24061 // check if masked bits of addr are zero
24062 __ testq($addr$$Register, $mask$$constant);
24063 __ jccb(Assembler::equal, Lskip);
24064 __ stop("verify_vector_alignment found a misaligned vector memory access");
24065 __ bind(Lskip);
24066 %}
24067 ins_pipe(pipe_slow);
24068 %}
24069
24070 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24071 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24072 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24073 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24074 ins_encode %{
24075 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24076 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24077
24078 Label DONE;
24079 int vlen_enc = vector_length_encoding(this, $src1);
24080 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24081
24082 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24083 __ mov64($dst$$Register, -1L);
24084 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24085 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24086 __ jccb(Assembler::carrySet, DONE);
24087 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24088 __ notq($dst$$Register);
24089 __ tzcntq($dst$$Register, $dst$$Register);
24090 __ bind(DONE);
24091 %}
24092 ins_pipe( pipe_slow );
24093 %}
24094
24095
24096 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24097 match(Set dst (VectorMaskGen len));
24098 effect(TEMP temp, KILL cr);
24099 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24100 ins_encode %{
24101 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24102 %}
24103 ins_pipe( pipe_slow );
24104 %}
24105
24106 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24107 match(Set dst (VectorMaskGen len));
24108 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24109 effect(TEMP temp);
24110 ins_encode %{
24111 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24112 __ kmovql($dst$$KRegister, $temp$$Register);
24113 %}
24114 ins_pipe( pipe_slow );
24115 %}
24116
24117 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24118 predicate(n->in(1)->bottom_type()->isa_vectmask());
24119 match(Set dst (VectorMaskToLong mask));
24120 effect(TEMP dst, KILL cr);
24121 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24122 ins_encode %{
24123 int opcode = this->ideal_Opcode();
24124 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24125 int mask_len = Matcher::vector_length(this, $mask);
24126 int mask_size = mask_len * type2aelembytes(mbt);
24127 int vlen_enc = vector_length_encoding(this, $mask);
24128 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24129 $dst$$Register, mask_len, mask_size, vlen_enc);
24130 %}
24131 ins_pipe( pipe_slow );
24132 %}
24133
24134 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24135 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24136 match(Set dst (VectorMaskToLong mask));
24137 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24138 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24139 ins_encode %{
24140 int opcode = this->ideal_Opcode();
24141 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24142 int mask_len = Matcher::vector_length(this, $mask);
24143 int vlen_enc = vector_length_encoding(this, $mask);
24144 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24145 $dst$$Register, mask_len, mbt, vlen_enc);
24146 %}
24147 ins_pipe( pipe_slow );
24148 %}
24149
24150 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24151 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24152 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24153 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24154 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24155 ins_encode %{
24156 int opcode = this->ideal_Opcode();
24157 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24158 int mask_len = Matcher::vector_length(this, $mask);
24159 int vlen_enc = vector_length_encoding(this, $mask);
24160 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24161 $dst$$Register, mask_len, mbt, vlen_enc);
24162 %}
24163 ins_pipe( pipe_slow );
24164 %}
24165
24166 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24167 predicate(n->in(1)->bottom_type()->isa_vectmask());
24168 match(Set dst (VectorMaskTrueCount mask));
24169 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24170 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24171 ins_encode %{
24172 int opcode = this->ideal_Opcode();
24173 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24174 int mask_len = Matcher::vector_length(this, $mask);
24175 int mask_size = mask_len * type2aelembytes(mbt);
24176 int vlen_enc = vector_length_encoding(this, $mask);
24177 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24178 $tmp$$Register, mask_len, mask_size, vlen_enc);
24179 %}
24180 ins_pipe( pipe_slow );
24181 %}
24182
24183 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24184 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24185 match(Set dst (VectorMaskTrueCount mask));
24186 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24187 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24188 ins_encode %{
24189 int opcode = this->ideal_Opcode();
24190 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24191 int mask_len = Matcher::vector_length(this, $mask);
24192 int vlen_enc = vector_length_encoding(this, $mask);
24193 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24194 $tmp$$Register, mask_len, mbt, vlen_enc);
24195 %}
24196 ins_pipe( pipe_slow );
24197 %}
24198
24199 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24200 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24201 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24202 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24203 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24204 ins_encode %{
24205 int opcode = this->ideal_Opcode();
24206 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24207 int mask_len = Matcher::vector_length(this, $mask);
24208 int vlen_enc = vector_length_encoding(this, $mask);
24209 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24210 $tmp$$Register, mask_len, mbt, vlen_enc);
24211 %}
24212 ins_pipe( pipe_slow );
24213 %}
24214
24215 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24216 predicate(n->in(1)->bottom_type()->isa_vectmask());
24217 match(Set dst (VectorMaskFirstTrue mask));
24218 match(Set dst (VectorMaskLastTrue mask));
24219 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24220 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24221 ins_encode %{
24222 int opcode = this->ideal_Opcode();
24223 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24224 int mask_len = Matcher::vector_length(this, $mask);
24225 int mask_size = mask_len * type2aelembytes(mbt);
24226 int vlen_enc = vector_length_encoding(this, $mask);
24227 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24228 $tmp$$Register, mask_len, mask_size, vlen_enc);
24229 %}
24230 ins_pipe( pipe_slow );
24231 %}
24232
24233 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24234 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24235 match(Set dst (VectorMaskFirstTrue mask));
24236 match(Set dst (VectorMaskLastTrue mask));
24237 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24238 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24239 ins_encode %{
24240 int opcode = this->ideal_Opcode();
24241 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24242 int mask_len = Matcher::vector_length(this, $mask);
24243 int vlen_enc = vector_length_encoding(this, $mask);
24244 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24245 $tmp$$Register, mask_len, mbt, vlen_enc);
24246 %}
24247 ins_pipe( pipe_slow );
24248 %}
24249
24250 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24251 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24252 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24253 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24254 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24255 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24256 ins_encode %{
24257 int opcode = this->ideal_Opcode();
24258 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24259 int mask_len = Matcher::vector_length(this, $mask);
24260 int vlen_enc = vector_length_encoding(this, $mask);
24261 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24262 $tmp$$Register, mask_len, mbt, vlen_enc);
24263 %}
24264 ins_pipe( pipe_slow );
24265 %}
24266
24267 // --------------------------------- Compress/Expand Operations ---------------------------
24268 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24269 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24270 match(Set dst (CompressV src mask));
24271 match(Set dst (ExpandV src mask));
24272 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24273 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24274 ins_encode %{
24275 int opcode = this->ideal_Opcode();
24276 int vlen_enc = vector_length_encoding(this);
24277 BasicType bt = Matcher::vector_element_basic_type(this);
24278 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24279 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24280 %}
24281 ins_pipe( pipe_slow );
24282 %}
24283
24284 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24285 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24286 match(Set dst (CompressV src mask));
24287 match(Set dst (ExpandV src mask));
24288 format %{ "vector_compress_expand $dst, $src, $mask" %}
24289 ins_encode %{
24290 int opcode = this->ideal_Opcode();
24291 int vector_len = vector_length_encoding(this);
24292 BasicType bt = Matcher::vector_element_basic_type(this);
24293 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24294 %}
24295 ins_pipe( pipe_slow );
24296 %}
24297
24298 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24299 match(Set dst (CompressM mask));
24300 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24301 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24302 ins_encode %{
24303 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24304 int mask_len = Matcher::vector_length(this);
24305 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24306 %}
24307 ins_pipe( pipe_slow );
24308 %}
24309
24310 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24311
24312 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24313 predicate(!VM_Version::supports_gfni());
24314 match(Set dst (ReverseV src));
24315 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24316 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24317 ins_encode %{
24318 int vec_enc = vector_length_encoding(this);
24319 BasicType bt = Matcher::vector_element_basic_type(this);
24320 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24321 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24322 %}
24323 ins_pipe( pipe_slow );
24324 %}
24325
24326 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24327 predicate(VM_Version::supports_gfni());
24328 match(Set dst (ReverseV src));
24329 effect(TEMP dst, TEMP xtmp);
24330 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24331 ins_encode %{
24332 int vec_enc = vector_length_encoding(this);
24333 BasicType bt = Matcher::vector_element_basic_type(this);
24334 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24335 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24336 $xtmp$$XMMRegister);
24337 %}
24338 ins_pipe( pipe_slow );
24339 %}
24340
24341 instruct vreverse_byte_reg(vec dst, vec src) %{
24342 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24343 match(Set dst (ReverseBytesV src));
24344 effect(TEMP dst);
24345 format %{ "vector_reverse_byte $dst, $src" %}
24346 ins_encode %{
24347 int vec_enc = vector_length_encoding(this);
24348 BasicType bt = Matcher::vector_element_basic_type(this);
24349 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24350 %}
24351 ins_pipe( pipe_slow );
24352 %}
24353
24354 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24355 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24356 match(Set dst (ReverseBytesV src));
24357 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24358 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24359 ins_encode %{
24360 int vec_enc = vector_length_encoding(this);
24361 BasicType bt = Matcher::vector_element_basic_type(this);
24362 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24363 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24364 %}
24365 ins_pipe( pipe_slow );
24366 %}
24367
24368 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24369
24370 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24371 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24372 Matcher::vector_length_in_bytes(n->in(1))));
24373 match(Set dst (CountLeadingZerosV src));
24374 format %{ "vector_count_leading_zeros $dst, $src" %}
24375 ins_encode %{
24376 int vlen_enc = vector_length_encoding(this, $src);
24377 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24378 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24379 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24380 %}
24381 ins_pipe( pipe_slow );
24382 %}
24383
24384 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24385 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24386 Matcher::vector_length_in_bytes(n->in(1))));
24387 match(Set dst (CountLeadingZerosV src mask));
24388 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24389 ins_encode %{
24390 int vlen_enc = vector_length_encoding(this, $src);
24391 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24392 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24393 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24394 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24395 %}
24396 ins_pipe( pipe_slow );
24397 %}
24398
24399 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24400 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24401 VM_Version::supports_avx512cd() &&
24402 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24403 match(Set dst (CountLeadingZerosV src));
24404 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24405 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24406 ins_encode %{
24407 int vlen_enc = vector_length_encoding(this, $src);
24408 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24409 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24410 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24411 %}
24412 ins_pipe( pipe_slow );
24413 %}
24414
24415 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24416 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24417 match(Set dst (CountLeadingZerosV src));
24418 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24419 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24420 ins_encode %{
24421 int vlen_enc = vector_length_encoding(this, $src);
24422 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24423 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24424 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24425 $rtmp$$Register, true, vlen_enc);
24426 %}
24427 ins_pipe( pipe_slow );
24428 %}
24429
24430 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24431 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24432 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24433 match(Set dst (CountLeadingZerosV src));
24434 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24435 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24436 ins_encode %{
24437 int vlen_enc = vector_length_encoding(this, $src);
24438 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24439 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24440 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24441 %}
24442 ins_pipe( pipe_slow );
24443 %}
24444
24445 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24446 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24447 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24448 match(Set dst (CountLeadingZerosV src));
24449 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24450 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24451 ins_encode %{
24452 int vlen_enc = vector_length_encoding(this, $src);
24453 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24454 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24455 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24456 %}
24457 ins_pipe( pipe_slow );
24458 %}
24459
24460 // ---------------------------------- Vector Masked Operations ------------------------------------
24461
24462 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24463 match(Set dst (AddVB (Binary dst src2) mask));
24464 match(Set dst (AddVS (Binary dst src2) mask));
24465 match(Set dst (AddVI (Binary dst src2) mask));
24466 match(Set dst (AddVL (Binary dst src2) mask));
24467 match(Set dst (AddVF (Binary dst src2) mask));
24468 match(Set dst (AddVD (Binary dst src2) mask));
24469 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24470 ins_encode %{
24471 int vlen_enc = vector_length_encoding(this);
24472 BasicType bt = Matcher::vector_element_basic_type(this);
24473 int opc = this->ideal_Opcode();
24474 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24475 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24476 %}
24477 ins_pipe( pipe_slow );
24478 %}
24479
24480 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24481 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24482 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24483 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24484 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24485 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24486 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24487 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24488 ins_encode %{
24489 int vlen_enc = vector_length_encoding(this);
24490 BasicType bt = Matcher::vector_element_basic_type(this);
24491 int opc = this->ideal_Opcode();
24492 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24493 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24494 %}
24495 ins_pipe( pipe_slow );
24496 %}
24497
24498 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24499 match(Set dst (XorV (Binary dst src2) mask));
24500 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24501 ins_encode %{
24502 int vlen_enc = vector_length_encoding(this);
24503 BasicType bt = Matcher::vector_element_basic_type(this);
24504 int opc = this->ideal_Opcode();
24505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24507 %}
24508 ins_pipe( pipe_slow );
24509 %}
24510
24511 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24512 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24513 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24514 ins_encode %{
24515 int vlen_enc = vector_length_encoding(this);
24516 BasicType bt = Matcher::vector_element_basic_type(this);
24517 int opc = this->ideal_Opcode();
24518 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24519 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24520 %}
24521 ins_pipe( pipe_slow );
24522 %}
24523
24524 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24525 match(Set dst (OrV (Binary dst src2) mask));
24526 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24527 ins_encode %{
24528 int vlen_enc = vector_length_encoding(this);
24529 BasicType bt = Matcher::vector_element_basic_type(this);
24530 int opc = this->ideal_Opcode();
24531 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24532 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24533 %}
24534 ins_pipe( pipe_slow );
24535 %}
24536
24537 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24538 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24539 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24540 ins_encode %{
24541 int vlen_enc = vector_length_encoding(this);
24542 BasicType bt = Matcher::vector_element_basic_type(this);
24543 int opc = this->ideal_Opcode();
24544 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24545 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24546 %}
24547 ins_pipe( pipe_slow );
24548 %}
24549
24550 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24551 match(Set dst (AndV (Binary dst src2) mask));
24552 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24553 ins_encode %{
24554 int vlen_enc = vector_length_encoding(this);
24555 BasicType bt = Matcher::vector_element_basic_type(this);
24556 int opc = this->ideal_Opcode();
24557 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24558 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24559 %}
24560 ins_pipe( pipe_slow );
24561 %}
24562
24563 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24564 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24565 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24566 ins_encode %{
24567 int vlen_enc = vector_length_encoding(this);
24568 BasicType bt = Matcher::vector_element_basic_type(this);
24569 int opc = this->ideal_Opcode();
24570 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24571 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24572 %}
24573 ins_pipe( pipe_slow );
24574 %}
24575
24576 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24577 match(Set dst (SubVB (Binary dst src2) mask));
24578 match(Set dst (SubVS (Binary dst src2) mask));
24579 match(Set dst (SubVI (Binary dst src2) mask));
24580 match(Set dst (SubVL (Binary dst src2) mask));
24581 match(Set dst (SubVF (Binary dst src2) mask));
24582 match(Set dst (SubVD (Binary dst src2) mask));
24583 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24584 ins_encode %{
24585 int vlen_enc = vector_length_encoding(this);
24586 BasicType bt = Matcher::vector_element_basic_type(this);
24587 int opc = this->ideal_Opcode();
24588 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24589 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24590 %}
24591 ins_pipe( pipe_slow );
24592 %}
24593
24594 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24595 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24596 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24597 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24598 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24599 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24600 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24601 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24602 ins_encode %{
24603 int vlen_enc = vector_length_encoding(this);
24604 BasicType bt = Matcher::vector_element_basic_type(this);
24605 int opc = this->ideal_Opcode();
24606 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24607 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24608 %}
24609 ins_pipe( pipe_slow );
24610 %}
24611
24612 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24613 match(Set dst (MulVS (Binary dst src2) mask));
24614 match(Set dst (MulVI (Binary dst src2) mask));
24615 match(Set dst (MulVL (Binary dst src2) mask));
24616 match(Set dst (MulVF (Binary dst src2) mask));
24617 match(Set dst (MulVD (Binary dst src2) mask));
24618 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24619 ins_encode %{
24620 int vlen_enc = vector_length_encoding(this);
24621 BasicType bt = Matcher::vector_element_basic_type(this);
24622 int opc = this->ideal_Opcode();
24623 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24624 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24625 %}
24626 ins_pipe( pipe_slow );
24627 %}
24628
24629 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24630 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24631 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24632 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24633 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24634 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24635 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24636 ins_encode %{
24637 int vlen_enc = vector_length_encoding(this);
24638 BasicType bt = Matcher::vector_element_basic_type(this);
24639 int opc = this->ideal_Opcode();
24640 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24641 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24642 %}
24643 ins_pipe( pipe_slow );
24644 %}
24645
24646 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24647 match(Set dst (SqrtVF dst mask));
24648 match(Set dst (SqrtVD dst mask));
24649 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24650 ins_encode %{
24651 int vlen_enc = vector_length_encoding(this);
24652 BasicType bt = Matcher::vector_element_basic_type(this);
24653 int opc = this->ideal_Opcode();
24654 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24655 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24656 %}
24657 ins_pipe( pipe_slow );
24658 %}
24659
24660 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24661 match(Set dst (DivVF (Binary dst src2) mask));
24662 match(Set dst (DivVD (Binary dst src2) mask));
24663 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24664 ins_encode %{
24665 int vlen_enc = vector_length_encoding(this);
24666 BasicType bt = Matcher::vector_element_basic_type(this);
24667 int opc = this->ideal_Opcode();
24668 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24669 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24670 %}
24671 ins_pipe( pipe_slow );
24672 %}
24673
24674 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24675 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24676 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24677 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24678 ins_encode %{
24679 int vlen_enc = vector_length_encoding(this);
24680 BasicType bt = Matcher::vector_element_basic_type(this);
24681 int opc = this->ideal_Opcode();
24682 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24683 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24684 %}
24685 ins_pipe( pipe_slow );
24686 %}
24687
24688
24689 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24690 match(Set dst (RotateLeftV (Binary dst shift) mask));
24691 match(Set dst (RotateRightV (Binary dst shift) mask));
24692 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24693 ins_encode %{
24694 int vlen_enc = vector_length_encoding(this);
24695 BasicType bt = Matcher::vector_element_basic_type(this);
24696 int opc = this->ideal_Opcode();
24697 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24698 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24699 %}
24700 ins_pipe( pipe_slow );
24701 %}
24702
24703 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24704 match(Set dst (RotateLeftV (Binary dst src2) mask));
24705 match(Set dst (RotateRightV (Binary dst src2) mask));
24706 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24707 ins_encode %{
24708 int vlen_enc = vector_length_encoding(this);
24709 BasicType bt = Matcher::vector_element_basic_type(this);
24710 int opc = this->ideal_Opcode();
24711 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24712 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24713 %}
24714 ins_pipe( pipe_slow );
24715 %}
24716
24717 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24718 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24719 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24720 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24721 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24722 ins_encode %{
24723 int vlen_enc = vector_length_encoding(this);
24724 BasicType bt = Matcher::vector_element_basic_type(this);
24725 int opc = this->ideal_Opcode();
24726 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24727 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24728 %}
24729 ins_pipe( pipe_slow );
24730 %}
24731
24732 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24733 predicate(!n->as_ShiftV()->is_var_shift());
24734 match(Set dst (LShiftVS (Binary dst src2) mask));
24735 match(Set dst (LShiftVI (Binary dst src2) mask));
24736 match(Set dst (LShiftVL (Binary dst src2) mask));
24737 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24738 ins_encode %{
24739 int vlen_enc = vector_length_encoding(this);
24740 BasicType bt = Matcher::vector_element_basic_type(this);
24741 int opc = this->ideal_Opcode();
24742 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24743 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24744 %}
24745 ins_pipe( pipe_slow );
24746 %}
24747
24748 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24749 predicate(n->as_ShiftV()->is_var_shift());
24750 match(Set dst (LShiftVS (Binary dst src2) mask));
24751 match(Set dst (LShiftVI (Binary dst src2) mask));
24752 match(Set dst (LShiftVL (Binary dst src2) mask));
24753 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24754 ins_encode %{
24755 int vlen_enc = vector_length_encoding(this);
24756 BasicType bt = Matcher::vector_element_basic_type(this);
24757 int opc = this->ideal_Opcode();
24758 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24759 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24760 %}
24761 ins_pipe( pipe_slow );
24762 %}
24763
24764 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24765 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24766 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24767 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24768 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24769 ins_encode %{
24770 int vlen_enc = vector_length_encoding(this);
24771 BasicType bt = Matcher::vector_element_basic_type(this);
24772 int opc = this->ideal_Opcode();
24773 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24774 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24775 %}
24776 ins_pipe( pipe_slow );
24777 %}
24778
24779 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24780 predicate(!n->as_ShiftV()->is_var_shift());
24781 match(Set dst (RShiftVS (Binary dst src2) mask));
24782 match(Set dst (RShiftVI (Binary dst src2) mask));
24783 match(Set dst (RShiftVL (Binary dst src2) mask));
24784 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24785 ins_encode %{
24786 int vlen_enc = vector_length_encoding(this);
24787 BasicType bt = Matcher::vector_element_basic_type(this);
24788 int opc = this->ideal_Opcode();
24789 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24790 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24791 %}
24792 ins_pipe( pipe_slow );
24793 %}
24794
24795 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24796 predicate(n->as_ShiftV()->is_var_shift());
24797 match(Set dst (RShiftVS (Binary dst src2) mask));
24798 match(Set dst (RShiftVI (Binary dst src2) mask));
24799 match(Set dst (RShiftVL (Binary dst src2) mask));
24800 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24801 ins_encode %{
24802 int vlen_enc = vector_length_encoding(this);
24803 BasicType bt = Matcher::vector_element_basic_type(this);
24804 int opc = this->ideal_Opcode();
24805 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24806 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24807 %}
24808 ins_pipe( pipe_slow );
24809 %}
24810
24811 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24812 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24813 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24814 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24815 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24816 ins_encode %{
24817 int vlen_enc = vector_length_encoding(this);
24818 BasicType bt = Matcher::vector_element_basic_type(this);
24819 int opc = this->ideal_Opcode();
24820 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24821 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24822 %}
24823 ins_pipe( pipe_slow );
24824 %}
24825
24826 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24827 predicate(!n->as_ShiftV()->is_var_shift());
24828 match(Set dst (URShiftVS (Binary dst src2) mask));
24829 match(Set dst (URShiftVI (Binary dst src2) mask));
24830 match(Set dst (URShiftVL (Binary dst src2) mask));
24831 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24832 ins_encode %{
24833 int vlen_enc = vector_length_encoding(this);
24834 BasicType bt = Matcher::vector_element_basic_type(this);
24835 int opc = this->ideal_Opcode();
24836 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24837 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24838 %}
24839 ins_pipe( pipe_slow );
24840 %}
24841
24842 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24843 predicate(n->as_ShiftV()->is_var_shift());
24844 match(Set dst (URShiftVS (Binary dst src2) mask));
24845 match(Set dst (URShiftVI (Binary dst src2) mask));
24846 match(Set dst (URShiftVL (Binary dst src2) mask));
24847 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24848 ins_encode %{
24849 int vlen_enc = vector_length_encoding(this);
24850 BasicType bt = Matcher::vector_element_basic_type(this);
24851 int opc = this->ideal_Opcode();
24852 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24853 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24854 %}
24855 ins_pipe( pipe_slow );
24856 %}
24857
24858 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24859 match(Set dst (MaxV (Binary dst src2) mask));
24860 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24861 ins_encode %{
24862 int vlen_enc = vector_length_encoding(this);
24863 BasicType bt = Matcher::vector_element_basic_type(this);
24864 int opc = this->ideal_Opcode();
24865 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24866 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24867 %}
24868 ins_pipe( pipe_slow );
24869 %}
24870
24871 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24872 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24873 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24874 ins_encode %{
24875 int vlen_enc = vector_length_encoding(this);
24876 BasicType bt = Matcher::vector_element_basic_type(this);
24877 int opc = this->ideal_Opcode();
24878 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24879 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24880 %}
24881 ins_pipe( pipe_slow );
24882 %}
24883
24884 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24885 match(Set dst (MinV (Binary dst src2) mask));
24886 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24887 ins_encode %{
24888 int vlen_enc = vector_length_encoding(this);
24889 BasicType bt = Matcher::vector_element_basic_type(this);
24890 int opc = this->ideal_Opcode();
24891 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24892 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24893 %}
24894 ins_pipe( pipe_slow );
24895 %}
24896
24897 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24898 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24899 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24900 ins_encode %{
24901 int vlen_enc = vector_length_encoding(this);
24902 BasicType bt = Matcher::vector_element_basic_type(this);
24903 int opc = this->ideal_Opcode();
24904 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24905 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24906 %}
24907 ins_pipe( pipe_slow );
24908 %}
24909
24910 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24911 match(Set dst (VectorRearrange (Binary dst src2) mask));
24912 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24913 ins_encode %{
24914 int vlen_enc = vector_length_encoding(this);
24915 BasicType bt = Matcher::vector_element_basic_type(this);
24916 int opc = this->ideal_Opcode();
24917 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24918 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24919 %}
24920 ins_pipe( pipe_slow );
24921 %}
24922
24923 instruct vabs_masked(vec dst, kReg mask) %{
24924 match(Set dst (AbsVB dst mask));
24925 match(Set dst (AbsVS dst mask));
24926 match(Set dst (AbsVI dst mask));
24927 match(Set dst (AbsVL dst mask));
24928 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24929 ins_encode %{
24930 int vlen_enc = vector_length_encoding(this);
24931 BasicType bt = Matcher::vector_element_basic_type(this);
24932 int opc = this->ideal_Opcode();
24933 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24934 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24935 %}
24936 ins_pipe( pipe_slow );
24937 %}
24938
24939 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24940 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24941 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24942 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24943 ins_encode %{
24944 assert(UseFMA, "Needs FMA instructions support.");
24945 int vlen_enc = vector_length_encoding(this);
24946 BasicType bt = Matcher::vector_element_basic_type(this);
24947 int opc = this->ideal_Opcode();
24948 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24949 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24950 %}
24951 ins_pipe( pipe_slow );
24952 %}
24953
24954 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24955 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24956 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24957 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24958 ins_encode %{
24959 assert(UseFMA, "Needs FMA instructions support.");
24960 int vlen_enc = vector_length_encoding(this);
24961 BasicType bt = Matcher::vector_element_basic_type(this);
24962 int opc = this->ideal_Opcode();
24963 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24964 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24965 %}
24966 ins_pipe( pipe_slow );
24967 %}
24968
24969 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24970 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24971 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24972 ins_encode %{
24973 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24974 int vlen_enc = vector_length_encoding(this, $src1);
24975 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24976
24977 // Comparison i
24978 switch (src1_elem_bt) {
24979 case T_BYTE: {
24980 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24981 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24982 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24983 break;
24984 }
24985 case T_SHORT: {
24986 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24987 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24988 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24989 break;
24990 }
24991 case T_INT: {
24992 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24993 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24994 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24995 break;
24996 }
24997 case T_LONG: {
24998 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24999 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25000 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25001 break;
25002 }
25003 case T_FLOAT: {
25004 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25005 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25006 break;
25007 }
25008 case T_DOUBLE: {
25009 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25010 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25011 break;
25012 }
25013 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25014 }
25015 %}
25016 ins_pipe( pipe_slow );
25017 %}
25018
25019 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25020 predicate(Matcher::vector_length(n) <= 32);
25021 match(Set dst (MaskAll src));
25022 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25023 ins_encode %{
25024 int mask_len = Matcher::vector_length(this);
25025 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25026 %}
25027 ins_pipe( pipe_slow );
25028 %}
25029
25030 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25031 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25032 match(Set dst (XorVMask src (MaskAll cnt)));
25033 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25034 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25035 ins_encode %{
25036 uint masklen = Matcher::vector_length(this);
25037 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25038 %}
25039 ins_pipe( pipe_slow );
25040 %}
25041
25042 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25043 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25044 (Matcher::vector_length(n) == 16) ||
25045 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25046 match(Set dst (XorVMask src (MaskAll cnt)));
25047 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25048 ins_encode %{
25049 uint masklen = Matcher::vector_length(this);
25050 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25051 %}
25052 ins_pipe( pipe_slow );
25053 %}
25054
25055 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25056 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25057 match(Set dst (VectorLongToMask src));
25058 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25059 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25060 ins_encode %{
25061 int mask_len = Matcher::vector_length(this);
25062 int vec_enc = vector_length_encoding(mask_len);
25063 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25064 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25065 %}
25066 ins_pipe( pipe_slow );
25067 %}
25068
25069
25070 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25071 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25072 match(Set dst (VectorLongToMask src));
25073 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25074 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25075 ins_encode %{
25076 int mask_len = Matcher::vector_length(this);
25077 assert(mask_len <= 32, "invalid mask length");
25078 int vec_enc = vector_length_encoding(mask_len);
25079 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25080 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25081 %}
25082 ins_pipe( pipe_slow );
25083 %}
25084
25085 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25086 predicate(n->bottom_type()->isa_vectmask());
25087 match(Set dst (VectorLongToMask src));
25088 format %{ "long_to_mask_evex $dst, $src\t!" %}
25089 ins_encode %{
25090 __ kmov($dst$$KRegister, $src$$Register);
25091 %}
25092 ins_pipe( pipe_slow );
25093 %}
25094
25095 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25096 match(Set dst (AndVMask src1 src2));
25097 match(Set dst (OrVMask src1 src2));
25098 match(Set dst (XorVMask src1 src2));
25099 effect(TEMP kscratch);
25100 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25101 ins_encode %{
25102 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25103 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25104 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25105 uint masklen = Matcher::vector_length(this);
25106 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25107 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25108 %}
25109 ins_pipe( pipe_slow );
25110 %}
25111
25112 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25113 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25114 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25115 ins_encode %{
25116 int vlen_enc = vector_length_encoding(this);
25117 BasicType bt = Matcher::vector_element_basic_type(this);
25118 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25119 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25120 %}
25121 ins_pipe( pipe_slow );
25122 %}
25123
25124 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25125 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25126 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25127 ins_encode %{
25128 int vlen_enc = vector_length_encoding(this);
25129 BasicType bt = Matcher::vector_element_basic_type(this);
25130 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25131 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25132 %}
25133 ins_pipe( pipe_slow );
25134 %}
25135
25136 instruct castMM(kReg dst)
25137 %{
25138 match(Set dst (CastVV dst));
25139
25140 size(0);
25141 format %{ "# castVV of $dst" %}
25142 ins_encode(/* empty encoding */);
25143 ins_cost(0);
25144 ins_pipe(empty);
25145 %}
25146
25147 instruct castVV(vec dst)
25148 %{
25149 match(Set dst (CastVV dst));
25150
25151 size(0);
25152 format %{ "# castVV of $dst" %}
25153 ins_encode(/* empty encoding */);
25154 ins_cost(0);
25155 ins_pipe(empty);
25156 %}
25157
25158 instruct castVVLeg(legVec dst)
25159 %{
25160 match(Set dst (CastVV dst));
25161
25162 size(0);
25163 format %{ "# castVV of $dst" %}
25164 ins_encode(/* empty encoding */);
25165 ins_cost(0);
25166 ins_pipe(empty);
25167 %}
25168
25169 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25170 %{
25171 match(Set dst (IsInfiniteF src));
25172 effect(TEMP ktmp, KILL cr);
25173 format %{ "float_class_check $dst, $src" %}
25174 ins_encode %{
25175 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25176 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25177 %}
25178 ins_pipe(pipe_slow);
25179 %}
25180
25181 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25182 %{
25183 match(Set dst (IsInfiniteD src));
25184 effect(TEMP ktmp, KILL cr);
25185 format %{ "double_class_check $dst, $src" %}
25186 ins_encode %{
25187 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25188 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25189 %}
25190 ins_pipe(pipe_slow);
25191 %}
25192
25193 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25194 %{
25195 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25196 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25197 match(Set dst (SaturatingAddV src1 src2));
25198 match(Set dst (SaturatingSubV src1 src2));
25199 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25200 ins_encode %{
25201 int vlen_enc = vector_length_encoding(this);
25202 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25203 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25204 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25205 %}
25206 ins_pipe(pipe_slow);
25207 %}
25208
25209 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25210 %{
25211 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25212 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25213 match(Set dst (SaturatingAddV src1 src2));
25214 match(Set dst (SaturatingSubV src1 src2));
25215 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25216 ins_encode %{
25217 int vlen_enc = vector_length_encoding(this);
25218 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25219 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25220 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25221 %}
25222 ins_pipe(pipe_slow);
25223 %}
25224
25225 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25226 %{
25227 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25228 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25229 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25230 match(Set dst (SaturatingAddV src1 src2));
25231 match(Set dst (SaturatingSubV src1 src2));
25232 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25233 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25234 ins_encode %{
25235 int vlen_enc = vector_length_encoding(this);
25236 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25237 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25238 $src1$$XMMRegister, $src2$$XMMRegister,
25239 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25240 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25241 %}
25242 ins_pipe(pipe_slow);
25243 %}
25244
25245 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25246 %{
25247 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25248 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25249 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25250 match(Set dst (SaturatingAddV src1 src2));
25251 match(Set dst (SaturatingSubV src1 src2));
25252 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25253 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25254 ins_encode %{
25255 int vlen_enc = vector_length_encoding(this);
25256 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25257 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25258 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25259 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25260 %}
25261 ins_pipe(pipe_slow);
25262 %}
25263
25264 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25265 %{
25266 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25267 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25268 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25269 match(Set dst (SaturatingAddV src1 src2));
25270 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25271 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25272 ins_encode %{
25273 int vlen_enc = vector_length_encoding(this);
25274 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25275 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25276 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25277 %}
25278 ins_pipe(pipe_slow);
25279 %}
25280
25281 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25282 %{
25283 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25284 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25285 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25286 match(Set dst (SaturatingAddV src1 src2));
25287 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25288 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25289 ins_encode %{
25290 int vlen_enc = vector_length_encoding(this);
25291 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25292 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25293 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25294 %}
25295 ins_pipe(pipe_slow);
25296 %}
25297
25298 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25299 %{
25300 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25301 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25302 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25303 match(Set dst (SaturatingSubV src1 src2));
25304 effect(TEMP ktmp);
25305 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25306 ins_encode %{
25307 int vlen_enc = vector_length_encoding(this);
25308 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25309 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25310 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25311 %}
25312 ins_pipe(pipe_slow);
25313 %}
25314
25315 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25316 %{
25317 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25318 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25319 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25320 match(Set dst (SaturatingSubV src1 src2));
25321 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25322 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25323 ins_encode %{
25324 int vlen_enc = vector_length_encoding(this);
25325 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25326 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25327 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25328 %}
25329 ins_pipe(pipe_slow);
25330 %}
25331
25332 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25333 %{
25334 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25335 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25336 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25337 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25338 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25339 ins_encode %{
25340 int vlen_enc = vector_length_encoding(this);
25341 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25342 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25343 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25344 %}
25345 ins_pipe(pipe_slow);
25346 %}
25347
25348 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25349 %{
25350 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25351 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25352 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25353 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25354 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25355 ins_encode %{
25356 int vlen_enc = vector_length_encoding(this);
25357 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25358 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25359 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25360 %}
25361 ins_pipe(pipe_slow);
25362 %}
25363
25364 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25365 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25366 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25367 match(Set dst (SaturatingAddV (Binary dst src) mask));
25368 match(Set dst (SaturatingSubV (Binary dst src) mask));
25369 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25370 ins_encode %{
25371 int vlen_enc = vector_length_encoding(this);
25372 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25373 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25374 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25375 %}
25376 ins_pipe( pipe_slow );
25377 %}
25378
25379 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25380 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25381 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25382 match(Set dst (SaturatingAddV (Binary dst src) mask));
25383 match(Set dst (SaturatingSubV (Binary dst src) mask));
25384 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25385 ins_encode %{
25386 int vlen_enc = vector_length_encoding(this);
25387 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25388 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25389 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25390 %}
25391 ins_pipe( pipe_slow );
25392 %}
25393
25394 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25395 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25396 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25397 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25398 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25399 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25400 ins_encode %{
25401 int vlen_enc = vector_length_encoding(this);
25402 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25403 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25404 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25405 %}
25406 ins_pipe( pipe_slow );
25407 %}
25408
25409 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25410 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25411 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25412 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25413 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25414 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25415 ins_encode %{
25416 int vlen_enc = vector_length_encoding(this);
25417 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25418 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25419 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25420 %}
25421 ins_pipe( pipe_slow );
25422 %}
25423
25424 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25425 %{
25426 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25427 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25428 ins_encode %{
25429 int vlen_enc = vector_length_encoding(this);
25430 BasicType bt = Matcher::vector_element_basic_type(this);
25431 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25432 %}
25433 ins_pipe(pipe_slow);
25434 %}
25435
25436 instruct reinterpretS2HF(regF dst, rRegI src)
25437 %{
25438 match(Set dst (ReinterpretS2HF src));
25439 format %{ "vmovw $dst, $src" %}
25440 ins_encode %{
25441 __ vmovw($dst$$XMMRegister, $src$$Register);
25442 %}
25443 ins_pipe(pipe_slow);
25444 %}
25445
25446 instruct reinterpretHF2S(rRegI dst, regF src)
25447 %{
25448 match(Set dst (ReinterpretHF2S src));
25449 format %{ "vmovw $dst, $src" %}
25450 ins_encode %{
25451 __ vmovw($dst$$Register, $src$$XMMRegister);
25452 %}
25453 ins_pipe(pipe_slow);
25454 %}
25455
25456 instruct convF2HFAndS2HF(regF dst, regF src)
25457 %{
25458 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25459 format %{ "convF2HFAndS2HF $dst, $src" %}
25460 ins_encode %{
25461 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25462 %}
25463 ins_pipe(pipe_slow);
25464 %}
25465
25466 instruct convHF2SAndHF2F(regF dst, regF src)
25467 %{
25468 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25469 format %{ "convHF2SAndHF2F $dst, $src" %}
25470 ins_encode %{
25471 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25472 %}
25473 ins_pipe(pipe_slow);
25474 %}
25475
25476 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25477 %{
25478 match(Set dst (SqrtHF src));
25479 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25480 ins_encode %{
25481 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25482 %}
25483 ins_pipe(pipe_slow);
25484 %}
25485
25486 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25487 %{
25488 match(Set dst (AddHF src1 src2));
25489 match(Set dst (DivHF src1 src2));
25490 match(Set dst (MulHF src1 src2));
25491 match(Set dst (SubHF src1 src2));
25492 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25493 ins_encode %{
25494 int opcode = this->ideal_Opcode();
25495 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25496 %}
25497 ins_pipe(pipe_slow);
25498 %}
25499
25500 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25501 %{
25502 predicate(VM_Version::supports_avx10_2());
25503 match(Set dst (MaxHF src1 src2));
25504 match(Set dst (MinHF src1 src2));
25505 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25506 ins_encode %{
25507 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25508 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25509 %}
25510 ins_pipe( pipe_slow );
25511 %}
25512
25513 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25514 %{
25515 predicate(!VM_Version::supports_avx10_2());
25516 match(Set dst (MaxHF src1 src2));
25517 match(Set dst (MinHF src1 src2));
25518 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25519 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25520 ins_encode %{
25521 int opcode = this->ideal_Opcode();
25522 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25523 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25524 %}
25525 ins_pipe( pipe_slow );
25526 %}
25527
25528 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25529 %{
25530 match(Set dst (FmaHF src2 (Binary dst src1)));
25531 effect(DEF dst);
25532 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25533 ins_encode %{
25534 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25535 %}
25536 ins_pipe( pipe_slow );
25537 %}
25538
25539
25540 instruct vector_sqrt_HF_reg(vec dst, vec src)
25541 %{
25542 match(Set dst (SqrtVHF src));
25543 format %{ "vector_sqrt_fp16 $dst, $src" %}
25544 ins_encode %{
25545 int vlen_enc = vector_length_encoding(this);
25546 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25547 %}
25548 ins_pipe(pipe_slow);
25549 %}
25550
25551 instruct vector_sqrt_HF_mem(vec dst, memory src)
25552 %{
25553 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25554 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25555 ins_encode %{
25556 int vlen_enc = vector_length_encoding(this);
25557 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25558 %}
25559 ins_pipe(pipe_slow);
25560 %}
25561
25562 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25563 %{
25564 match(Set dst (AddVHF src1 src2));
25565 match(Set dst (DivVHF src1 src2));
25566 match(Set dst (MulVHF src1 src2));
25567 match(Set dst (SubVHF src1 src2));
25568 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25569 ins_encode %{
25570 int vlen_enc = vector_length_encoding(this);
25571 int opcode = this->ideal_Opcode();
25572 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25573 %}
25574 ins_pipe(pipe_slow);
25575 %}
25576
25577
25578 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25579 %{
25580 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25581 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25582 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25583 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25584 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25585 ins_encode %{
25586 int vlen_enc = vector_length_encoding(this);
25587 int opcode = this->ideal_Opcode();
25588 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25589 %}
25590 ins_pipe(pipe_slow);
25591 %}
25592
25593 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25594 %{
25595 match(Set dst (FmaVHF src2 (Binary dst src1)));
25596 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25597 ins_encode %{
25598 int vlen_enc = vector_length_encoding(this);
25599 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25600 %}
25601 ins_pipe( pipe_slow );
25602 %}
25603
25604 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25605 %{
25606 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25607 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25608 ins_encode %{
25609 int vlen_enc = vector_length_encoding(this);
25610 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25611 %}
25612 ins_pipe( pipe_slow );
25613 %}
25614
25615 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25616 %{
25617 predicate(VM_Version::supports_avx10_2());
25618 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25619 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25620 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25621 ins_encode %{
25622 int vlen_enc = vector_length_encoding(this);
25623 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25624 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25625 %}
25626 ins_pipe( pipe_slow );
25627 %}
25628
25629 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25630 %{
25631 predicate(VM_Version::supports_avx10_2());
25632 match(Set dst (MinVHF src1 src2));
25633 match(Set dst (MaxVHF src1 src2));
25634 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25635 ins_encode %{
25636 int vlen_enc = vector_length_encoding(this);
25637 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25638 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25639 %}
25640 ins_pipe( pipe_slow );
25641 %}
25642
25643 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25644 %{
25645 predicate(!VM_Version::supports_avx10_2());
25646 match(Set dst (MinVHF src1 src2));
25647 match(Set dst (MaxVHF src1 src2));
25648 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25649 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25650 ins_encode %{
25651 int vlen_enc = vector_length_encoding(this);
25652 int opcode = this->ideal_Opcode();
25653 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25654 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25655 %}
25656 ins_pipe( pipe_slow );
25657 %}
25658
25659 //----------PEEPHOLE RULES-----------------------------------------------------
25660 // These must follow all instruction definitions as they use the names
25661 // defined in the instructions definitions.
25662 //
25663 // peeppredicate ( rule_predicate );
25664 // // the predicate unless which the peephole rule will be ignored
25665 //
25666 // peepmatch ( root_instr_name [preceding_instruction]* );
25667 //
25668 // peepprocedure ( procedure_name );
25669 // // provide a procedure name to perform the optimization, the procedure should
25670 // // reside in the architecture dependent peephole file, the method has the
25671 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25672 // // with the arguments being the basic block, the current node index inside the
25673 // // block, the register allocator, the functions upon invoked return a new node
25674 // // defined in peepreplace, and the rules of the nodes appearing in the
25675 // // corresponding peepmatch, the function return true if successful, else
25676 // // return false
25677 //
25678 // peepconstraint %{
25679 // (instruction_number.operand_name relational_op instruction_number.operand_name
25680 // [, ...] );
25681 // // instruction numbers are zero-based using left to right order in peepmatch
25682 //
25683 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25684 // // provide an instruction_number.operand_name for each operand that appears
25685 // // in the replacement instruction's match rule
25686 //
25687 // ---------VM FLAGS---------------------------------------------------------
25688 //
25689 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25690 //
25691 // Each peephole rule is given an identifying number starting with zero and
25692 // increasing by one in the order seen by the parser. An individual peephole
25693 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25694 // on the command-line.
25695 //
25696 // ---------CURRENT LIMITATIONS----------------------------------------------
25697 //
25698 // Only transformations inside a basic block (do we need more for peephole)
25699 //
25700 // ---------EXAMPLE----------------------------------------------------------
25701 //
25702 // // pertinent parts of existing instructions in architecture description
25703 // instruct movI(rRegI dst, rRegI src)
25704 // %{
25705 // match(Set dst (CopyI src));
25706 // %}
25707 //
25708 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25709 // %{
25710 // match(Set dst (AddI dst src));
25711 // effect(KILL cr);
25712 // %}
25713 //
25714 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25715 // %{
25716 // match(Set dst (AddI dst src));
25717 // %}
25718 //
25719 // 1. Simple replacement
25720 // - Only match adjacent instructions in same basic block
25721 // - Only equality constraints
25722 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25723 // - Only one replacement instruction
25724 //
25725 // // Change (inc mov) to lea
25726 // peephole %{
25727 // // lea should only be emitted when beneficial
25728 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25729 // // increment preceded by register-register move
25730 // peepmatch ( incI_rReg movI );
25731 // // require that the destination register of the increment
25732 // // match the destination register of the move
25733 // peepconstraint ( 0.dst == 1.dst );
25734 // // construct a replacement instruction that sets
25735 // // the destination to ( move's source register + one )
25736 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25737 // %}
25738 //
25739 // 2. Procedural replacement
25740 // - More flexible finding relevent nodes
25741 // - More flexible constraints
25742 // - More flexible transformations
25743 // - May utilise architecture-dependent API more effectively
25744 // - Currently only one replacement instruction due to adlc parsing capabilities
25745 //
25746 // // Change (inc mov) to lea
25747 // peephole %{
25748 // // lea should only be emitted when beneficial
25749 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25750 // // the rule numbers of these nodes inside are passed into the function below
25751 // peepmatch ( incI_rReg movI );
25752 // // the method that takes the responsibility of transformation
25753 // peepprocedure ( inc_mov_to_lea );
25754 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25755 // // node is passed into the function above
25756 // peepreplace ( leaI_rReg_immI() );
25757 // %}
25758
25759 // These instructions is not matched by the matcher but used by the peephole
25760 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25761 %{
25762 predicate(false);
25763 match(Set dst (AddI src1 src2));
25764 format %{ "leal $dst, [$src1 + $src2]" %}
25765 ins_encode %{
25766 Register dst = $dst$$Register;
25767 Register src1 = $src1$$Register;
25768 Register src2 = $src2$$Register;
25769 if (src1 != rbp && src1 != r13) {
25770 __ leal(dst, Address(src1, src2, Address::times_1));
25771 } else {
25772 assert(src2 != rbp && src2 != r13, "");
25773 __ leal(dst, Address(src2, src1, Address::times_1));
25774 }
25775 %}
25776 ins_pipe(ialu_reg_reg);
25777 %}
25778
25779 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25780 %{
25781 predicate(false);
25782 match(Set dst (AddI src1 src2));
25783 format %{ "leal $dst, [$src1 + $src2]" %}
25784 ins_encode %{
25785 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25786 %}
25787 ins_pipe(ialu_reg_reg);
25788 %}
25789
25790 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25791 %{
25792 predicate(false);
25793 match(Set dst (LShiftI src shift));
25794 format %{ "leal $dst, [$src << $shift]" %}
25795 ins_encode %{
25796 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25797 Register src = $src$$Register;
25798 if (scale == Address::times_2 && src != rbp && src != r13) {
25799 __ leal($dst$$Register, Address(src, src, Address::times_1));
25800 } else {
25801 __ leal($dst$$Register, Address(noreg, src, scale));
25802 }
25803 %}
25804 ins_pipe(ialu_reg_reg);
25805 %}
25806
25807 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25808 %{
25809 predicate(false);
25810 match(Set dst (AddL src1 src2));
25811 format %{ "leaq $dst, [$src1 + $src2]" %}
25812 ins_encode %{
25813 Register dst = $dst$$Register;
25814 Register src1 = $src1$$Register;
25815 Register src2 = $src2$$Register;
25816 if (src1 != rbp && src1 != r13) {
25817 __ leaq(dst, Address(src1, src2, Address::times_1));
25818 } else {
25819 assert(src2 != rbp && src2 != r13, "");
25820 __ leaq(dst, Address(src2, src1, Address::times_1));
25821 }
25822 %}
25823 ins_pipe(ialu_reg_reg);
25824 %}
25825
25826 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25827 %{
25828 predicate(false);
25829 match(Set dst (AddL src1 src2));
25830 format %{ "leaq $dst, [$src1 + $src2]" %}
25831 ins_encode %{
25832 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25833 %}
25834 ins_pipe(ialu_reg_reg);
25835 %}
25836
25837 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25838 %{
25839 predicate(false);
25840 match(Set dst (LShiftL src shift));
25841 format %{ "leaq $dst, [$src << $shift]" %}
25842 ins_encode %{
25843 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25844 Register src = $src$$Register;
25845 if (scale == Address::times_2 && src != rbp && src != r13) {
25846 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25847 } else {
25848 __ leaq($dst$$Register, Address(noreg, src, scale));
25849 }
25850 %}
25851 ins_pipe(ialu_reg_reg);
25852 %}
25853
25854 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25855 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25856 // processors with at least partial ALU support for lea
25857 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25858 // beneficial for processors with full ALU support
25859 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25860
25861 peephole
25862 %{
25863 peeppredicate(VM_Version::supports_fast_2op_lea());
25864 peepmatch (addI_rReg);
25865 peepprocedure (lea_coalesce_reg);
25866 peepreplace (leaI_rReg_rReg_peep());
25867 %}
25868
25869 peephole
25870 %{
25871 peeppredicate(VM_Version::supports_fast_2op_lea());
25872 peepmatch (addI_rReg_imm);
25873 peepprocedure (lea_coalesce_imm);
25874 peepreplace (leaI_rReg_immI_peep());
25875 %}
25876
25877 peephole
25878 %{
25879 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25880 VM_Version::is_intel_cascade_lake());
25881 peepmatch (incI_rReg);
25882 peepprocedure (lea_coalesce_imm);
25883 peepreplace (leaI_rReg_immI_peep());
25884 %}
25885
25886 peephole
25887 %{
25888 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25889 VM_Version::is_intel_cascade_lake());
25890 peepmatch (decI_rReg);
25891 peepprocedure (lea_coalesce_imm);
25892 peepreplace (leaI_rReg_immI_peep());
25893 %}
25894
25895 peephole
25896 %{
25897 peeppredicate(VM_Version::supports_fast_2op_lea());
25898 peepmatch (salI_rReg_immI2);
25899 peepprocedure (lea_coalesce_imm);
25900 peepreplace (leaI_rReg_immI2_peep());
25901 %}
25902
25903 peephole
25904 %{
25905 peeppredicate(VM_Version::supports_fast_2op_lea());
25906 peepmatch (addL_rReg);
25907 peepprocedure (lea_coalesce_reg);
25908 peepreplace (leaL_rReg_rReg_peep());
25909 %}
25910
25911 peephole
25912 %{
25913 peeppredicate(VM_Version::supports_fast_2op_lea());
25914 peepmatch (addL_rReg_imm);
25915 peepprocedure (lea_coalesce_imm);
25916 peepreplace (leaL_rReg_immL32_peep());
25917 %}
25918
25919 peephole
25920 %{
25921 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25922 VM_Version::is_intel_cascade_lake());
25923 peepmatch (incL_rReg);
25924 peepprocedure (lea_coalesce_imm);
25925 peepreplace (leaL_rReg_immL32_peep());
25926 %}
25927
25928 peephole
25929 %{
25930 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25931 VM_Version::is_intel_cascade_lake());
25932 peepmatch (decL_rReg);
25933 peepprocedure (lea_coalesce_imm);
25934 peepreplace (leaL_rReg_immL32_peep());
25935 %}
25936
25937 peephole
25938 %{
25939 peeppredicate(VM_Version::supports_fast_2op_lea());
25940 peepmatch (salL_rReg_immI2);
25941 peepprocedure (lea_coalesce_imm);
25942 peepreplace (leaL_rReg_immI2_peep());
25943 %}
25944
25945 peephole
25946 %{
25947 peepmatch (leaPCompressedOopOffset);
25948 peepprocedure (lea_remove_redundant);
25949 %}
25950
25951 peephole
25952 %{
25953 peepmatch (leaP8Narrow);
25954 peepprocedure (lea_remove_redundant);
25955 %}
25956
25957 peephole
25958 %{
25959 peepmatch (leaP32Narrow);
25960 peepprocedure (lea_remove_redundant);
25961 %}
25962
25963 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25964 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25965
25966 //int variant
25967 peephole
25968 %{
25969 peepmatch (testI_reg);
25970 peepprocedure (test_may_remove);
25971 %}
25972
25973 //long variant
25974 peephole
25975 %{
25976 peepmatch (testL_reg);
25977 peepprocedure (test_may_remove);
25978 %}
25979
25980
25981 //----------SMARTSPILL RULES---------------------------------------------------
25982 // These must follow all instruction definitions as they use the names
25983 // defined in the instructions definitions.