1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 Label done;
1708 __ movl(dst, -1);
1709 __ jcc(Assembler::parity, done);
1710 __ jcc(Assembler::below, done);
1711 __ setcc(Assembler::notEqual, dst);
1712 __ bind(done);
1713 }
1714
1715 // Math.min() # Math.max()
1716 // --------------------------
1717 // ucomis[s/d] #
1718 // ja -> b # a
1719 // jp -> NaN # NaN
1720 // jb -> a # b
1721 // je #
1722 // |-jz -> a | b # a & b
1723 // | -> a #
1724 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1725 XMMRegister a, XMMRegister b,
1726 XMMRegister xmmt, Register rt,
1727 bool min, bool single) {
1728
1729 Label nan, zero, below, above, done;
1730
1731 if (single)
1732 __ ucomiss(a, b);
1733 else
1734 __ ucomisd(a, b);
1735
1736 if (dst->encoding() != (min ? b : a)->encoding())
1737 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1738 else
1739 __ jccb(Assembler::above, done);
1740
1741 __ jccb(Assembler::parity, nan); // PF=1
1742 __ jccb(Assembler::below, below); // CF=1
1743
1744 // equal
1745 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1746 if (single) {
1747 __ ucomiss(a, xmmt);
1748 __ jccb(Assembler::equal, zero);
1749
1750 __ movflt(dst, a);
1751 __ jmp(done);
1752 }
1753 else {
1754 __ ucomisd(a, xmmt);
1755 __ jccb(Assembler::equal, zero);
1756
1757 __ movdbl(dst, a);
1758 __ jmp(done);
1759 }
1760
1761 __ bind(zero);
1762 if (min)
1763 __ vpor(dst, a, b, Assembler::AVX_128bit);
1764 else
1765 __ vpand(dst, a, b, Assembler::AVX_128bit);
1766
1767 __ jmp(done);
1768
1769 __ bind(above);
1770 if (single)
1771 __ movflt(dst, min ? b : a);
1772 else
1773 __ movdbl(dst, min ? b : a);
1774
1775 __ jmp(done);
1776
1777 __ bind(nan);
1778 if (single) {
1779 __ movl(rt, 0x7fc00000); // Float.NaN
1780 __ movdl(dst, rt);
1781 }
1782 else {
1783 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1784 __ movdq(dst, rt);
1785 }
1786 __ jmp(done);
1787
1788 __ bind(below);
1789 if (single)
1790 __ movflt(dst, min ? a : b);
1791 else
1792 __ movdbl(dst, min ? a : b);
1793
1794 __ bind(done);
1795 }
1796
1797 //=============================================================================
1798 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1799
1800 int ConstantTable::calculate_table_base_offset() const {
1801 return 0; // absolute addressing, no offset
1802 }
1803
1804 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1805 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1806 ShouldNotReachHere();
1807 }
1808
1809 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1810 // Empty encoding
1811 }
1812
1813 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1814 return 0;
1815 }
1816
1817 #ifndef PRODUCT
1818 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1819 st->print("# MachConstantBaseNode (empty encoding)");
1820 }
1821 #endif
1822
1823
1824 //=============================================================================
1825 #ifndef PRODUCT
1826 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1827 Compile* C = ra_->C;
1828
1829 int framesize = C->output()->frame_size_in_bytes();
1830 int bangsize = C->output()->bang_size_in_bytes();
1831 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1832 // Remove wordSize for return addr which is already pushed.
1833 framesize -= wordSize;
1834
1835 if (C->output()->need_stack_bang(bangsize)) {
1836 framesize -= wordSize;
1837 st->print("# stack bang (%d bytes)", bangsize);
1838 st->print("\n\t");
1839 st->print("pushq rbp\t# Save rbp");
1840 if (PreserveFramePointer) {
1841 st->print("\n\t");
1842 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1843 }
1844 if (framesize) {
1845 st->print("\n\t");
1846 st->print("subq rsp, #%d\t# Create frame",framesize);
1847 }
1848 } else {
1849 st->print("subq rsp, #%d\t# Create frame",framesize);
1850 st->print("\n\t");
1851 framesize -= wordSize;
1852 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1853 if (PreserveFramePointer) {
1854 st->print("\n\t");
1855 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1856 if (framesize > 0) {
1857 st->print("\n\t");
1858 st->print("addq rbp, #%d", framesize);
1859 }
1860 }
1861 }
1862
1863 if (VerifyStackAtCalls) {
1864 st->print("\n\t");
1865 framesize -= wordSize;
1866 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1867 #ifdef ASSERT
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1928 framesize -= 2*wordSize;
1929
1930 if (framesize) {
1931 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1932 st->print("\t");
1933 }
1934
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
1996 static enum RC rc_class(OptoReg::Name reg)
1997 {
1998 if( !OptoReg::is_valid(reg) ) return rc_bad;
1999
2000 if (OptoReg::is_stack(reg)) return rc_stack;
2001
2002 VMReg r = OptoReg::as_VMReg(reg);
2003
2004 if (r->is_Register()) return rc_int;
2005
2006 if (r->is_KRegister()) return rc_kreg;
2007
2008 assert(r->is_XMMRegister(), "must be");
2009 return rc_float;
2010 }
2011
2012 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2013 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2014 int src_hi, int dst_hi, uint ireg, outputStream* st);
2015
2016 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2017 int stack_offset, int reg, uint ireg, outputStream* st);
2018
2019 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2020 int dst_offset, uint ireg, outputStream* st) {
2021 if (masm) {
2022 switch (ireg) {
2023 case Op_VecS:
2024 __ movq(Address(rsp, -8), rax);
2025 __ movl(rax, Address(rsp, src_offset));
2026 __ movl(Address(rsp, dst_offset), rax);
2027 __ movq(rax, Address(rsp, -8));
2028 break;
2029 case Op_VecD:
2030 __ pushq(Address(rsp, src_offset));
2031 __ popq (Address(rsp, dst_offset));
2032 break;
2033 case Op_VecX:
2034 __ pushq(Address(rsp, src_offset));
2035 __ popq (Address(rsp, dst_offset));
2036 __ pushq(Address(rsp, src_offset+8));
2037 __ popq (Address(rsp, dst_offset+8));
2038 break;
2039 case Op_VecY:
2040 __ vmovdqu(Address(rsp, -32), xmm0);
2041 __ vmovdqu(xmm0, Address(rsp, src_offset));
2042 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2043 __ vmovdqu(xmm0, Address(rsp, -32));
2044 break;
2045 case Op_VecZ:
2046 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2047 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2048 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2049 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2050 break;
2051 default:
2052 ShouldNotReachHere();
2053 }
2054 #ifndef PRODUCT
2055 } else {
2056 switch (ireg) {
2057 case Op_VecS:
2058 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2059 "movl rax, [rsp + #%d]\n\t"
2060 "movl [rsp + #%d], rax\n\t"
2061 "movq rax, [rsp - #8]",
2062 src_offset, dst_offset);
2063 break;
2064 case Op_VecD:
2065 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2066 "popq [rsp + #%d]",
2067 src_offset, dst_offset);
2068 break;
2069 case Op_VecX:
2070 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2071 "popq [rsp + #%d]\n\t"
2072 "pushq [rsp + #%d]\n\t"
2073 "popq [rsp + #%d]",
2074 src_offset, dst_offset, src_offset+8, dst_offset+8);
2075 break;
2076 case Op_VecY:
2077 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2078 "vmovdqu xmm0, [rsp + #%d]\n\t"
2079 "vmovdqu [rsp + #%d], xmm0\n\t"
2080 "vmovdqu xmm0, [rsp - #32]",
2081 src_offset, dst_offset);
2082 break;
2083 case Op_VecZ:
2084 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2085 "vmovdqu xmm0, [rsp + #%d]\n\t"
2086 "vmovdqu [rsp + #%d], xmm0\n\t"
2087 "vmovdqu xmm0, [rsp - #64]",
2088 src_offset, dst_offset);
2089 break;
2090 default:
2091 ShouldNotReachHere();
2092 }
2093 #endif
2094 }
2095 }
2096
2097 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2098 PhaseRegAlloc* ra_,
2099 bool do_size,
2100 outputStream* st) const {
2101 assert(masm != nullptr || st != nullptr, "sanity");
2102 // Get registers to move
2103 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2104 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2105 OptoReg::Name dst_second = ra_->get_reg_second(this);
2106 OptoReg::Name dst_first = ra_->get_reg_first(this);
2107
2108 enum RC src_second_rc = rc_class(src_second);
2109 enum RC src_first_rc = rc_class(src_first);
2110 enum RC dst_second_rc = rc_class(dst_second);
2111 enum RC dst_first_rc = rc_class(dst_first);
2112
2113 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2114 "must move at least 1 register" );
2115
2116 if (src_first == dst_first && src_second == dst_second) {
2117 // Self copy, no move
2118 return 0;
2119 }
2120 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2121 uint ireg = ideal_reg();
2122 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2123 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2125 // mem -> mem
2126 int src_offset = ra_->reg2offset(src_first);
2127 int dst_offset = ra_->reg2offset(dst_first);
2128 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2129 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2130 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2131 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2132 int stack_offset = ra_->reg2offset(dst_first);
2133 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2134 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2135 int stack_offset = ra_->reg2offset(src_first);
2136 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2137 } else {
2138 ShouldNotReachHere();
2139 }
2140 return 0;
2141 }
2142 if (src_first_rc == rc_stack) {
2143 // mem ->
2144 if (dst_first_rc == rc_stack) {
2145 // mem -> mem
2146 assert(src_second != dst_first, "overlap");
2147 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2148 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2149 // 64-bit
2150 int src_offset = ra_->reg2offset(src_first);
2151 int dst_offset = ra_->reg2offset(dst_first);
2152 if (masm) {
2153 __ pushq(Address(rsp, src_offset));
2154 __ popq (Address(rsp, dst_offset));
2155 #ifndef PRODUCT
2156 } else {
2157 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2158 "popq [rsp + #%d]",
2159 src_offset, dst_offset);
2160 #endif
2161 }
2162 } else {
2163 // 32-bit
2164 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2165 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2166 // No pushl/popl, so:
2167 int src_offset = ra_->reg2offset(src_first);
2168 int dst_offset = ra_->reg2offset(dst_first);
2169 if (masm) {
2170 __ movq(Address(rsp, -8), rax);
2171 __ movl(rax, Address(rsp, src_offset));
2172 __ movl(Address(rsp, dst_offset), rax);
2173 __ movq(rax, Address(rsp, -8));
2174 #ifndef PRODUCT
2175 } else {
2176 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2177 "movl rax, [rsp + #%d]\n\t"
2178 "movl [rsp + #%d], rax\n\t"
2179 "movq rax, [rsp - #8]",
2180 src_offset, dst_offset);
2181 #endif
2182 }
2183 }
2184 return 0;
2185 } else if (dst_first_rc == rc_int) {
2186 // mem -> gpr
2187 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2188 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2189 // 64-bit
2190 int offset = ra_->reg2offset(src_first);
2191 if (masm) {
2192 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2193 #ifndef PRODUCT
2194 } else {
2195 st->print("movq %s, [rsp + #%d]\t# spill",
2196 Matcher::regName[dst_first],
2197 offset);
2198 #endif
2199 }
2200 } else {
2201 // 32-bit
2202 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2203 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2204 int offset = ra_->reg2offset(src_first);
2205 if (masm) {
2206 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2207 #ifndef PRODUCT
2208 } else {
2209 st->print("movl %s, [rsp + #%d]\t# spill",
2210 Matcher::regName[dst_first],
2211 offset);
2212 #endif
2213 }
2214 }
2215 return 0;
2216 } else if (dst_first_rc == rc_float) {
2217 // mem-> xmm
2218 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2219 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2220 // 64-bit
2221 int offset = ra_->reg2offset(src_first);
2222 if (masm) {
2223 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2224 #ifndef PRODUCT
2225 } else {
2226 st->print("%s %s, [rsp + #%d]\t# spill",
2227 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2228 Matcher::regName[dst_first],
2229 offset);
2230 #endif
2231 }
2232 } else {
2233 // 32-bit
2234 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2235 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2236 int offset = ra_->reg2offset(src_first);
2237 if (masm) {
2238 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2239 #ifndef PRODUCT
2240 } else {
2241 st->print("movss %s, [rsp + #%d]\t# spill",
2242 Matcher::regName[dst_first],
2243 offset);
2244 #endif
2245 }
2246 }
2247 return 0;
2248 } else if (dst_first_rc == rc_kreg) {
2249 // mem -> kreg
2250 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2251 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2252 // 64-bit
2253 int offset = ra_->reg2offset(src_first);
2254 if (masm) {
2255 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2256 #ifndef PRODUCT
2257 } else {
2258 st->print("kmovq %s, [rsp + #%d]\t# spill",
2259 Matcher::regName[dst_first],
2260 offset);
2261 #endif
2262 }
2263 }
2264 return 0;
2265 }
2266 } else if (src_first_rc == rc_int) {
2267 // gpr ->
2268 if (dst_first_rc == rc_stack) {
2269 // gpr -> mem
2270 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2271 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2272 // 64-bit
2273 int offset = ra_->reg2offset(dst_first);
2274 if (masm) {
2275 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2276 #ifndef PRODUCT
2277 } else {
2278 st->print("movq [rsp + #%d], %s\t# spill",
2279 offset,
2280 Matcher::regName[src_first]);
2281 #endif
2282 }
2283 } else {
2284 // 32-bit
2285 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2286 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2287 int offset = ra_->reg2offset(dst_first);
2288 if (masm) {
2289 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2290 #ifndef PRODUCT
2291 } else {
2292 st->print("movl [rsp + #%d], %s\t# spill",
2293 offset,
2294 Matcher::regName[src_first]);
2295 #endif
2296 }
2297 }
2298 return 0;
2299 } else if (dst_first_rc == rc_int) {
2300 // gpr -> gpr
2301 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2302 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2303 // 64-bit
2304 if (masm) {
2305 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2306 as_Register(Matcher::_regEncode[src_first]));
2307 #ifndef PRODUCT
2308 } else {
2309 st->print("movq %s, %s\t# spill",
2310 Matcher::regName[dst_first],
2311 Matcher::regName[src_first]);
2312 #endif
2313 }
2314 return 0;
2315 } else {
2316 // 32-bit
2317 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2318 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2319 if (masm) {
2320 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2321 as_Register(Matcher::_regEncode[src_first]));
2322 #ifndef PRODUCT
2323 } else {
2324 st->print("movl %s, %s\t# spill",
2325 Matcher::regName[dst_first],
2326 Matcher::regName[src_first]);
2327 #endif
2328 }
2329 return 0;
2330 }
2331 } else if (dst_first_rc == rc_float) {
2332 // gpr -> xmm
2333 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2334 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2335 // 64-bit
2336 if (masm) {
2337 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2338 #ifndef PRODUCT
2339 } else {
2340 st->print("movdq %s, %s\t# spill",
2341 Matcher::regName[dst_first],
2342 Matcher::regName[src_first]);
2343 #endif
2344 }
2345 } else {
2346 // 32-bit
2347 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2348 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2349 if (masm) {
2350 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2351 #ifndef PRODUCT
2352 } else {
2353 st->print("movdl %s, %s\t# spill",
2354 Matcher::regName[dst_first],
2355 Matcher::regName[src_first]);
2356 #endif
2357 }
2358 }
2359 return 0;
2360 } else if (dst_first_rc == rc_kreg) {
2361 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2362 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2363 // 64-bit
2364 if (masm) {
2365 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2366 #ifndef PRODUCT
2367 } else {
2368 st->print("kmovq %s, %s\t# spill",
2369 Matcher::regName[dst_first],
2370 Matcher::regName[src_first]);
2371 #endif
2372 }
2373 }
2374 Unimplemented();
2375 return 0;
2376 }
2377 } else if (src_first_rc == rc_float) {
2378 // xmm ->
2379 if (dst_first_rc == rc_stack) {
2380 // xmm -> mem
2381 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2382 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2383 // 64-bit
2384 int offset = ra_->reg2offset(dst_first);
2385 if (masm) {
2386 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2387 #ifndef PRODUCT
2388 } else {
2389 st->print("movsd [rsp + #%d], %s\t# spill",
2390 offset,
2391 Matcher::regName[src_first]);
2392 #endif
2393 }
2394 } else {
2395 // 32-bit
2396 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2397 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2398 int offset = ra_->reg2offset(dst_first);
2399 if (masm) {
2400 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2401 #ifndef PRODUCT
2402 } else {
2403 st->print("movss [rsp + #%d], %s\t# spill",
2404 offset,
2405 Matcher::regName[src_first]);
2406 #endif
2407 }
2408 }
2409 return 0;
2410 } else if (dst_first_rc == rc_int) {
2411 // xmm -> gpr
2412 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2413 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2414 // 64-bit
2415 if (masm) {
2416 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2417 #ifndef PRODUCT
2418 } else {
2419 st->print("movdq %s, %s\t# spill",
2420 Matcher::regName[dst_first],
2421 Matcher::regName[src_first]);
2422 #endif
2423 }
2424 } else {
2425 // 32-bit
2426 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2427 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2428 if (masm) {
2429 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2430 #ifndef PRODUCT
2431 } else {
2432 st->print("movdl %s, %s\t# spill",
2433 Matcher::regName[dst_first],
2434 Matcher::regName[src_first]);
2435 #endif
2436 }
2437 }
2438 return 0;
2439 } else if (dst_first_rc == rc_float) {
2440 // xmm -> xmm
2441 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2442 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2443 // 64-bit
2444 if (masm) {
2445 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2446 #ifndef PRODUCT
2447 } else {
2448 st->print("%s %s, %s\t# spill",
2449 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2450 Matcher::regName[dst_first],
2451 Matcher::regName[src_first]);
2452 #endif
2453 }
2454 } else {
2455 // 32-bit
2456 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2457 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2458 if (masm) {
2459 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2460 #ifndef PRODUCT
2461 } else {
2462 st->print("%s %s, %s\t# spill",
2463 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2464 Matcher::regName[dst_first],
2465 Matcher::regName[src_first]);
2466 #endif
2467 }
2468 }
2469 return 0;
2470 } else if (dst_first_rc == rc_kreg) {
2471 assert(false, "Illegal spilling");
2472 return 0;
2473 }
2474 } else if (src_first_rc == rc_kreg) {
2475 if (dst_first_rc == rc_stack) {
2476 // mem -> kreg
2477 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2478 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2479 // 64-bit
2480 int offset = ra_->reg2offset(dst_first);
2481 if (masm) {
2482 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2483 #ifndef PRODUCT
2484 } else {
2485 st->print("kmovq [rsp + #%d] , %s\t# spill",
2486 offset,
2487 Matcher::regName[src_first]);
2488 #endif
2489 }
2490 }
2491 return 0;
2492 } else if (dst_first_rc == rc_int) {
2493 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2494 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2495 // 64-bit
2496 if (masm) {
2497 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2498 #ifndef PRODUCT
2499 } else {
2500 st->print("kmovq %s, %s\t# spill",
2501 Matcher::regName[dst_first],
2502 Matcher::regName[src_first]);
2503 #endif
2504 }
2505 }
2506 Unimplemented();
2507 return 0;
2508 } else if (dst_first_rc == rc_kreg) {
2509 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2510 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2511 // 64-bit
2512 if (masm) {
2513 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2514 #ifndef PRODUCT
2515 } else {
2516 st->print("kmovq %s, %s\t# spill",
2517 Matcher::regName[dst_first],
2518 Matcher::regName[src_first]);
2519 #endif
2520 }
2521 }
2522 return 0;
2523 } else if (dst_first_rc == rc_float) {
2524 assert(false, "Illegal spill");
2525 return 0;
2526 }
2527 }
2528
2529 assert(0," foo ");
2530 Unimplemented();
2531 return 0;
2532 }
2533
2534 #ifndef PRODUCT
2535 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2536 implementation(nullptr, ra_, false, st);
2537 }
2538 #endif
2539
2540 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2541 implementation(masm, ra_, false, nullptr);
2542 }
2543
2544 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2545 return MachNode::size(ra_);
2546 }
2547
2548 //=============================================================================
2549 #ifndef PRODUCT
2550 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2551 {
2552 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2553 int reg = ra_->get_reg_first(this);
2554 st->print("leaq %s, [rsp + #%d]\t# box lock",
2555 Matcher::regName[reg], offset);
2556 }
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
2660
2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2662 int oper_index) {
2663 if (mdef == nullptr) {
2664 return false;
2665 }
2666
2667 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2668 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2669 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2670 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2671 return false;
2672 }
2673
2674 // Complex memory operand covers multiple incoming edges needed for
2675 // address computation. Biasing def towards any address component will not
2676 // result in NDD demotion by assembler.
2677 if (mdef->operand_num_edges(oper_index) != 1) {
2678 return false;
2679 }
2680
2681 // Demotion candidate must be register mask compatible with definition.
2682 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2683 if (!oper_mask.overlap(mdef->out_RegMask())) {
2684 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2685 return false;
2686 }
2687
2688 switch (oper_index) {
2689 // First operand of MachNode corresponding to Intel APX NDD selection
2690 // pattern can share its assigned register with definition operand if
2691 // their live ranges do not overlap. In such a scenario we can demote
2692 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2693 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2694 // are decorated with a special flag by instruction selector.
2695 case 1:
2696 return is_ndd_demotable_opr1(mdef);
2697
2698 // Definition operand of commutative operation can be biased towards second
2699 // operand.
2700 case 2:
2701 return is_ndd_demotable_opr2(mdef);
2702
2703 // Current scheme only selects up to two biasing candidates
2704 default:
2705 assert(false, "unhandled operand index: %s", mdef->Name());
2706 break;
2707 }
2708
2709 return false;
2710 }
2711
2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2713 assert(EnableVectorSupport, "sanity");
2714 int lo = XMM0_num;
2715 int hi = XMM0b_num;
2716 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2717 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2718 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2719 return OptoRegPair(hi, lo);
2720 }
2721
2722 // Is this branch offset short enough that a short branch can be used?
2723 //
2724 // NOTE: If the platform does not provide any short branch variants, then
2725 // this method should return false for offset 0.
2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2727 // The passed offset is relative to address of the branch.
2728 // On 86 a branch displacement is calculated relative to address
2729 // of a next instruction.
2730 offset -= br_size;
2731
2732 // the short version of jmpConUCF2 contains multiple branches,
2733 // making the reach slightly less
2734 if (rule == jmpConUCF2_rule)
2735 return (-126 <= offset && offset <= 125);
2736 return (-128 <= offset && offset <= 127);
2737 }
2738
2739 // Return whether or not this register is ever used as an argument.
2740 // This function is used on startup to build the trampoline stubs in
2741 // generateOptoStub. Registers not mentioned will be killed by the VM
2742 // call in the trampoline, and arguments in those registers not be
2743 // available to the callee.
2744 bool Matcher::can_be_java_arg(int reg)
2745 {
2746 return
2747 reg == RDI_num || reg == RDI_H_num ||
2748 reg == RSI_num || reg == RSI_H_num ||
2749 reg == RDX_num || reg == RDX_H_num ||
2750 reg == RCX_num || reg == RCX_H_num ||
2751 reg == R8_num || reg == R8_H_num ||
2752 reg == R9_num || reg == R9_H_num ||
2753 reg == R12_num || reg == R12_H_num ||
2754 reg == XMM0_num || reg == XMM0b_num ||
2755 reg == XMM1_num || reg == XMM1b_num ||
2756 reg == XMM2_num || reg == XMM2b_num ||
2757 reg == XMM3_num || reg == XMM3b_num ||
2758 reg == XMM4_num || reg == XMM4b_num ||
2759 reg == XMM5_num || reg == XMM5b_num ||
2760 reg == XMM6_num || reg == XMM6b_num ||
2761 reg == XMM7_num || reg == XMM7b_num;
2762 }
2763
2764 bool Matcher::is_spillable_arg(int reg)
2765 {
2766 return can_be_java_arg(reg);
2767 }
2768
2769 uint Matcher::int_pressure_limit()
2770 {
2771 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2772 }
2773
2774 uint Matcher::float_pressure_limit()
2775 {
2776 // After experiment around with different values, the following default threshold
2777 // works best for LCM's register pressure scheduling on x64.
2778 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2779 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2780 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2781 }
2782
2783 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2784 // In 64 bit mode a code which use multiply when
2785 // devisor is constant is faster than hardware
2786 // DIV instruction (it uses MulHiL).
2787 return false;
2788 }
2789
2790 // Register for DIVI projection of divmodI
2791 const RegMask& Matcher::divI_proj_mask() {
2792 return INT_RAX_REG_mask();
2793 }
2794
2795 // Register for MODI projection of divmodI
2796 const RegMask& Matcher::modI_proj_mask() {
2797 return INT_RDX_REG_mask();
2798 }
2799
2800 // Register for DIVL projection of divmodL
2801 const RegMask& Matcher::divL_proj_mask() {
2802 return LONG_RAX_REG_mask();
2803 }
2804
2805 // Register for MODL projection of divmodL
2806 const RegMask& Matcher::modL_proj_mask() {
2807 return LONG_RDX_REG_mask();
2808 }
2809
2810 %}
2811
2812 source_hpp %{
2813 // Header information of the source block.
2814 // Method declarations/definitions which are used outside
2815 // the ad-scope can conveniently be defined here.
2816 //
2817 // To keep related declarations/definitions/uses close together,
2818 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2819
2820 #include "runtime/vm_version.hpp"
2821
2822 class NativeJump;
2823
2824 class CallStubImpl {
2825
2826 //--------------------------------------------------------------
2827 //---< Used for optimization in Compile::shorten_branches >---
2828 //--------------------------------------------------------------
2829
2830 public:
2831 // Size of call trampoline stub.
2832 static uint size_call_trampoline() {
2833 return 0; // no call trampolines on this platform
2834 }
2835
2836 // number of relocations needed by a call trampoline stub
2837 static uint reloc_call_trampoline() {
2838 return 0; // no call trampolines on this platform
2839 }
2840 };
2841
2842 class HandlerImpl {
2843
2844 public:
2845
2846 static int emit_deopt_handler(C2_MacroAssembler* masm);
2847
2848 static uint size_deopt_handler() {
2849 // one call and one jmp.
2850 return 7;
2851 }
2852 };
2853
2854 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2855 switch(bytes) {
2856 case 4: // fall-through
2857 case 8: // fall-through
2858 case 16: return Assembler::AVX_128bit;
2859 case 32: return Assembler::AVX_256bit;
2860 case 64: return Assembler::AVX_512bit;
2861
2862 default: {
2863 ShouldNotReachHere();
2864 return Assembler::AVX_NoVec;
2865 }
2866 }
2867 }
2868
2869 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2870 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2871 }
2872
2873 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2874 uint def_idx = use->operand_index(opnd);
2875 Node* def = use->in(def_idx);
2876 return vector_length_encoding(def);
2877 }
2878
2879 static inline bool is_vector_popcount_predicate(BasicType bt) {
2880 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2881 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2882 }
2883
2884 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2885 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2886 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2887 }
2888
2889 class Node::PD {
2890 public:
2891 enum NodeFlags : uint64_t {
2892 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2893 Flag_sets_carry_flag = Node::_last_flag << 2,
2894 Flag_sets_parity_flag = Node::_last_flag << 3,
2895 Flag_sets_zero_flag = Node::_last_flag << 4,
2896 Flag_sets_overflow_flag = Node::_last_flag << 5,
2897 Flag_sets_sign_flag = Node::_last_flag << 6,
2898 Flag_clears_carry_flag = Node::_last_flag << 7,
2899 Flag_clears_parity_flag = Node::_last_flag << 8,
2900 Flag_clears_zero_flag = Node::_last_flag << 9,
2901 Flag_clears_overflow_flag = Node::_last_flag << 10,
2902 Flag_clears_sign_flag = Node::_last_flag << 11,
2903 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2904 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2905 _last_flag = Flag_ndd_demotable_opr2
2906 };
2907 };
2908
2909 %} // end source_hpp
2910
2911 source %{
2912
2913 #include "opto/addnode.hpp"
2914 #include "c2_intelJccErratum_x86.hpp"
2915
2916 void PhaseOutput::pd_perform_mach_node_analysis() {
2917 if (VM_Version::has_intel_jcc_erratum()) {
2918 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2919 _buf_sizes._code += extra_padding;
2920 }
2921 }
2922
2923 int MachNode::pd_alignment_required() const {
2924 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2925 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2926 return IntelJccErratum::largest_jcc_size() + 1;
2927 } else {
2928 return 1;
2929 }
2930 }
2931
2932 int MachNode::compute_padding(int current_offset) const {
2933 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2934 Compile* C = Compile::current();
2935 PhaseOutput* output = C->output();
2936 Block* block = output->block();
2937 int index = output->index();
2938 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2939 } else {
2940 return 0;
2941 }
2942 }
2943
2944 // Emit deopt handler code.
2945 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2946
2947 // Note that the code buffer's insts_mark is always relative to insts.
2948 // That's why we must use the macroassembler to generate a handler.
2949 address base = __ start_a_stub(size_deopt_handler());
2950 if (base == nullptr) {
2951 ciEnv::current()->record_failure("CodeCache is full");
2952 return 0; // CodeBuffer::expand failed
2953 }
2954 int offset = __ offset();
2955
2956 Label start;
2957 __ bind(start);
2958
2959 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2960
2961 int entry_offset = __ offset();
2962
2963 __ jmp(start);
2964
2965 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2966 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2967 "out of bounds read in post-call NOP check");
2968 __ end_a_stub();
2969 return entry_offset;
2970 }
2971
2972 static Assembler::Width widthForType(BasicType bt) {
2973 if (bt == T_BYTE) {
2974 return Assembler::B;
2975 } else if (bt == T_SHORT) {
2976 return Assembler::W;
2977 } else if (bt == T_INT) {
2978 return Assembler::D;
2979 } else {
2980 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2981 return Assembler::Q;
2982 }
2983 }
2984
2985 //=============================================================================
2986
2987 // Float masks come from different places depending on platform.
2988 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2989 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2990 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2991 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2992 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2993 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2994 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2995 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2996 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2997 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2998 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2999 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
3000 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
3001 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
3002 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
3003 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
3004 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
3005 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
3006 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
3007
3008 //=============================================================================
3009 bool Matcher::match_rule_supported(int opcode) {
3010 if (!has_match_rule(opcode)) {
3011 return false; // no match rule present
3012 }
3013 switch (opcode) {
3014 case Op_AbsVL:
3015 case Op_StoreVectorScatter:
3016 if (UseAVX < 3) {
3017 return false;
3018 }
3019 break;
3020 case Op_PopCountI:
3021 case Op_PopCountL:
3022 if (!UsePopCountInstruction) {
3023 return false;
3024 }
3025 break;
3026 case Op_PopCountVI:
3027 if (UseAVX < 2) {
3028 return false;
3029 }
3030 break;
3031 case Op_CompressV:
3032 case Op_ExpandV:
3033 case Op_PopCountVL:
3034 if (UseAVX < 2) {
3035 return false;
3036 }
3037 break;
3038 case Op_MulVI:
3039 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3040 return false;
3041 }
3042 break;
3043 case Op_MulVL:
3044 if (UseSSE < 4) { // only with SSE4_1 or AVX
3045 return false;
3046 }
3047 break;
3048 case Op_MulReductionVL:
3049 if (VM_Version::supports_avx512dq() == false) {
3050 return false;
3051 }
3052 break;
3053 case Op_AbsVB:
3054 case Op_AbsVS:
3055 case Op_AbsVI:
3056 case Op_AddReductionVI:
3057 case Op_AndReductionV:
3058 case Op_OrReductionV:
3059 case Op_XorReductionV:
3060 if (UseSSE < 3) { // requires at least SSSE3
3061 return false;
3062 }
3063 break;
3064 case Op_MaxHF:
3065 case Op_MinHF:
3066 if (!VM_Version::supports_avx512vlbw()) {
3067 return false;
3068 } // fallthrough
3069 case Op_AddHF:
3070 case Op_DivHF:
3071 case Op_FmaHF:
3072 case Op_MulHF:
3073 case Op_ReinterpretS2HF:
3074 case Op_ReinterpretHF2S:
3075 case Op_SubHF:
3076 case Op_SqrtHF:
3077 if (!VM_Version::supports_avx512_fp16()) {
3078 return false;
3079 }
3080 break;
3081 case Op_VectorLoadShuffle:
3082 case Op_VectorRearrange:
3083 case Op_MulReductionVI:
3084 if (UseSSE < 4) { // requires at least SSE4
3085 return false;
3086 }
3087 break;
3088 case Op_IsInfiniteF:
3089 case Op_IsInfiniteD:
3090 if (!VM_Version::supports_avx512dq()) {
3091 return false;
3092 }
3093 break;
3094 case Op_SqrtVD:
3095 case Op_SqrtVF:
3096 case Op_VectorMaskCmp:
3097 case Op_VectorCastB2X:
3098 case Op_VectorCastS2X:
3099 case Op_VectorCastI2X:
3100 case Op_VectorCastL2X:
3101 case Op_VectorCastF2X:
3102 case Op_VectorCastD2X:
3103 case Op_VectorUCastB2X:
3104 case Op_VectorUCastS2X:
3105 case Op_VectorUCastI2X:
3106 case Op_VectorMaskCast:
3107 if (UseAVX < 1) { // enabled for AVX only
3108 return false;
3109 }
3110 break;
3111 case Op_PopulateIndex:
3112 if (UseAVX < 2) {
3113 return false;
3114 }
3115 break;
3116 case Op_RoundVF:
3117 if (UseAVX < 2) { // enabled for AVX2 only
3118 return false;
3119 }
3120 break;
3121 case Op_RoundVD:
3122 if (UseAVX < 3) {
3123 return false; // enabled for AVX3 only
3124 }
3125 break;
3126 case Op_CompareAndSwapL:
3127 case Op_CompareAndSwapP:
3128 break;
3129 case Op_StrIndexOf:
3130 if (!UseSSE42Intrinsics) {
3131 return false;
3132 }
3133 break;
3134 case Op_StrIndexOfChar:
3135 if (!UseSSE42Intrinsics) {
3136 return false;
3137 }
3138 break;
3139 case Op_OnSpinWait:
3140 if (VM_Version::supports_on_spin_wait() == false) {
3141 return false;
3142 }
3143 break;
3144 case Op_MulVB:
3145 case Op_LShiftVB:
3146 case Op_RShiftVB:
3147 case Op_URShiftVB:
3148 case Op_VectorInsert:
3149 case Op_VectorLoadMask:
3150 case Op_VectorStoreMask:
3151 case Op_VectorBlend:
3152 if (UseSSE < 4) {
3153 return false;
3154 }
3155 break;
3156 case Op_MaxD:
3157 case Op_MaxF:
3158 case Op_MinD:
3159 case Op_MinF:
3160 if (UseAVX < 1) { // enabled for AVX only
3161 return false;
3162 }
3163 break;
3164 case Op_CacheWB:
3165 case Op_CacheWBPreSync:
3166 case Op_CacheWBPostSync:
3167 if (!VM_Version::supports_data_cache_line_flush()) {
3168 return false;
3169 }
3170 break;
3171 case Op_ExtractB:
3172 case Op_ExtractL:
3173 case Op_ExtractI:
3174 case Op_RoundDoubleMode:
3175 if (UseSSE < 4) {
3176 return false;
3177 }
3178 break;
3179 case Op_RoundDoubleModeV:
3180 if (VM_Version::supports_avx() == false) {
3181 return false; // 128bit vroundpd is not available
3182 }
3183 break;
3184 case Op_LoadVectorGather:
3185 case Op_LoadVectorGatherMasked:
3186 if (UseAVX < 2) {
3187 return false;
3188 }
3189 break;
3190 case Op_FmaF:
3191 case Op_FmaD:
3192 case Op_FmaVD:
3193 case Op_FmaVF:
3194 if (!UseFMA) {
3195 return false;
3196 }
3197 break;
3198 case Op_MacroLogicV:
3199 if (UseAVX < 3 || !UseVectorMacroLogic) {
3200 return false;
3201 }
3202 break;
3203
3204 case Op_VectorCmpMasked:
3205 case Op_VectorMaskGen:
3206 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3207 return false;
3208 }
3209 break;
3210 case Op_VectorMaskFirstTrue:
3211 case Op_VectorMaskLastTrue:
3212 case Op_VectorMaskTrueCount:
3213 case Op_VectorMaskToLong:
3214 if (UseAVX < 1) {
3215 return false;
3216 }
3217 break;
3218 case Op_RoundF:
3219 case Op_RoundD:
3220 break;
3221 case Op_CopySignD:
3222 case Op_CopySignF:
3223 if (UseAVX < 3) {
3224 return false;
3225 }
3226 if (!VM_Version::supports_avx512vl()) {
3227 return false;
3228 }
3229 break;
3230 case Op_CompressBits:
3231 case Op_ExpandBits:
3232 if (!VM_Version::supports_bmi2()) {
3233 return false;
3234 }
3235 break;
3236 case Op_CompressM:
3237 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3238 return false;
3239 }
3240 break;
3241 case Op_ConvF2HF:
3242 case Op_ConvHF2F:
3243 if (!VM_Version::supports_float16()) {
3244 return false;
3245 }
3246 break;
3247 case Op_VectorCastF2HF:
3248 case Op_VectorCastHF2F:
3249 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3250 return false;
3251 }
3252 break;
3253 }
3254 return true; // Match rules are supported by default.
3255 }
3256
3257 //------------------------------------------------------------------------
3258
3259 static inline bool is_pop_count_instr_target(BasicType bt) {
3260 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3261 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3262 }
3263
3264 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3265 return match_rule_supported_vector(opcode, vlen, bt);
3266 }
3267
3268 // Identify extra cases that we might want to provide match rules for vector nodes and
3269 // other intrinsics guarded with vector length (vlen) and element type (bt).
3270 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3271 if (!match_rule_supported(opcode)) {
3272 return false;
3273 }
3274 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3275 // * SSE2 supports 128bit vectors for all types;
3276 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3277 // * AVX2 supports 256bit vectors for all types;
3278 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3279 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3280 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3281 // And MaxVectorSize is taken into account as well.
3282 if (!vector_size_supported(bt, vlen)) {
3283 return false;
3284 }
3285 // Special cases which require vector length follow:
3286 // * implementation limitations
3287 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3288 // * 128bit vroundpd instruction is present only in AVX1
3289 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3290 switch (opcode) {
3291 case Op_MaxVHF:
3292 case Op_MinVHF:
3293 if (!VM_Version::supports_avx512bw()) {
3294 return false;
3295 }
3296 case Op_AddVHF:
3297 case Op_DivVHF:
3298 case Op_FmaVHF:
3299 case Op_MulVHF:
3300 case Op_SubVHF:
3301 case Op_SqrtVHF:
3302 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3303 return false;
3304 }
3305 if (!VM_Version::supports_avx512_fp16()) {
3306 return false;
3307 }
3308 break;
3309 case Op_AbsVF:
3310 case Op_NegVF:
3311 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3312 return false; // 512bit vandps and vxorps are not available
3313 }
3314 break;
3315 case Op_AbsVD:
3316 case Op_NegVD:
3317 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3318 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3319 }
3320 break;
3321 case Op_RotateRightV:
3322 case Op_RotateLeftV:
3323 if (bt != T_INT && bt != T_LONG) {
3324 return false;
3325 } // fallthrough
3326 case Op_MacroLogicV:
3327 if (!VM_Version::supports_evex() ||
3328 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3329 return false;
3330 }
3331 break;
3332 case Op_ClearArray:
3333 case Op_VectorMaskGen:
3334 case Op_VectorCmpMasked:
3335 if (!VM_Version::supports_avx512bw()) {
3336 return false;
3337 }
3338 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3339 return false;
3340 }
3341 break;
3342 case Op_LoadVectorMasked:
3343 case Op_StoreVectorMasked:
3344 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3345 return false;
3346 }
3347 break;
3348 case Op_UMinV:
3349 case Op_UMaxV:
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 break;
3354 case Op_MaxV:
3355 case Op_MinV:
3356 if (UseSSE < 4 && is_integral_type(bt)) {
3357 return false;
3358 }
3359 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3360 // Float/Double intrinsics are enabled for AVX family currently.
3361 if (UseAVX == 0) {
3362 return false;
3363 }
3364 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3365 return false;
3366 }
3367 }
3368 break;
3369 case Op_CallLeafVector:
3370 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3371 return false;
3372 }
3373 break;
3374 case Op_AddReductionVI:
3375 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3376 return false;
3377 }
3378 // fallthrough
3379 case Op_AndReductionV:
3380 case Op_OrReductionV:
3381 case Op_XorReductionV:
3382 if (is_subword_type(bt) && (UseSSE < 4)) {
3383 return false;
3384 }
3385 break;
3386 case Op_MinReductionV:
3387 case Op_MaxReductionV:
3388 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3389 return false;
3390 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3391 return false;
3392 }
3393 // Float/Double intrinsics enabled for AVX family.
3394 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3395 return false;
3396 }
3397 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3398 return false;
3399 }
3400 break;
3401 case Op_VectorBlend:
3402 if (UseAVX == 0 && size_in_bits < 128) {
3403 return false;
3404 }
3405 break;
3406 case Op_VectorTest:
3407 if (UseSSE < 4) {
3408 return false; // Implementation limitation
3409 } else if (size_in_bits < 32) {
3410 return false; // Implementation limitation
3411 }
3412 break;
3413 case Op_VectorLoadShuffle:
3414 case Op_VectorRearrange:
3415 if(vlen == 2) {
3416 return false; // Implementation limitation due to how shuffle is loaded
3417 } else if (size_in_bits == 256 && UseAVX < 2) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_VectorLoadMask:
3422 case Op_VectorMaskCast:
3423 if (size_in_bits == 256 && UseAVX < 2) {
3424 return false; // Implementation limitation
3425 }
3426 // fallthrough
3427 case Op_VectorStoreMask:
3428 if (vlen == 2) {
3429 return false; // Implementation limitation
3430 }
3431 break;
3432 case Op_PopulateIndex:
3433 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3434 return false;
3435 }
3436 break;
3437 case Op_VectorCastB2X:
3438 case Op_VectorCastS2X:
3439 case Op_VectorCastI2X:
3440 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3441 return false;
3442 }
3443 break;
3444 case Op_VectorCastL2X:
3445 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3446 return false;
3447 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3448 return false;
3449 }
3450 break;
3451 case Op_VectorCastF2X: {
3452 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3453 // happen after intermediate conversion to integer and special handling
3454 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3455 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3456 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3457 return false;
3458 }
3459 }
3460 // fallthrough
3461 case Op_VectorCastD2X:
3462 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3463 return false;
3464 }
3465 break;
3466 case Op_VectorCastF2HF:
3467 case Op_VectorCastHF2F:
3468 if (!VM_Version::supports_f16c() &&
3469 ((!VM_Version::supports_evex() ||
3470 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3471 return false;
3472 }
3473 break;
3474 case Op_RoundVD:
3475 if (!VM_Version::supports_avx512dq()) {
3476 return false;
3477 }
3478 break;
3479 case Op_MulReductionVI:
3480 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3481 return false;
3482 }
3483 break;
3484 case Op_LoadVectorGatherMasked:
3485 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3486 return false;
3487 }
3488 if (is_subword_type(bt) &&
3489 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3490 (size_in_bits < 64) ||
3491 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3492 return false;
3493 }
3494 break;
3495 case Op_StoreVectorScatterMasked:
3496 case Op_StoreVectorScatter:
3497 if (is_subword_type(bt)) {
3498 return false;
3499 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3500 return false;
3501 }
3502 // fallthrough
3503 case Op_LoadVectorGather:
3504 if (!is_subword_type(bt) && size_in_bits == 64) {
3505 return false;
3506 }
3507 if (is_subword_type(bt) && size_in_bits < 64) {
3508 return false;
3509 }
3510 break;
3511 case Op_SaturatingAddV:
3512 case Op_SaturatingSubV:
3513 if (UseAVX < 1) {
3514 return false; // Implementation limitation
3515 }
3516 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3517 return false;
3518 }
3519 break;
3520 case Op_SelectFromTwoVector:
3521 if (size_in_bits < 128) {
3522 return false;
3523 }
3524 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3525 return false;
3526 }
3527 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3528 return false;
3529 }
3530 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3531 return false;
3532 }
3533 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3534 return false;
3535 }
3536 break;
3537 case Op_MaskAll:
3538 if (!VM_Version::supports_evex()) {
3539 return false;
3540 }
3541 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3542 return false;
3543 }
3544 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3545 return false;
3546 }
3547 break;
3548 case Op_VectorMaskCmp:
3549 if (vlen < 2 || size_in_bits < 32) {
3550 return false;
3551 }
3552 break;
3553 case Op_CompressM:
3554 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3555 return false;
3556 }
3557 break;
3558 case Op_CompressV:
3559 case Op_ExpandV:
3560 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3561 return false;
3562 }
3563 if (size_in_bits < 128 ) {
3564 return false;
3565 }
3566 case Op_VectorLongToMask:
3567 if (UseAVX < 1) {
3568 return false;
3569 }
3570 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3571 return false;
3572 }
3573 break;
3574 case Op_SignumVD:
3575 case Op_SignumVF:
3576 if (UseAVX < 1) {
3577 return false;
3578 }
3579 break;
3580 case Op_PopCountVI:
3581 case Op_PopCountVL: {
3582 if (!is_pop_count_instr_target(bt) &&
3583 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3584 return false;
3585 }
3586 }
3587 break;
3588 case Op_ReverseV:
3589 case Op_ReverseBytesV:
3590 if (UseAVX < 2) {
3591 return false;
3592 }
3593 break;
3594 case Op_CountTrailingZerosV:
3595 case Op_CountLeadingZerosV:
3596 if (UseAVX < 2) {
3597 return false;
3598 }
3599 break;
3600 }
3601 return true; // Per default match rules are supported.
3602 }
3603
3604 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3605 // ADLC based match_rule_supported routine checks for the existence of pattern based
3606 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3607 // of their non-masked counterpart with mask edge being the differentiator.
3608 // This routine does a strict check on the existence of masked operation patterns
3609 // by returning a default false value for all the other opcodes apart from the
3610 // ones whose masked instruction patterns are defined in this file.
3611 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3612 return false;
3613 }
3614
3615 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3616 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3617 return false;
3618 }
3619 switch(opcode) {
3620 // Unary masked operations
3621 case Op_AbsVB:
3622 case Op_AbsVS:
3623 if(!VM_Version::supports_avx512bw()) {
3624 return false; // Implementation limitation
3625 }
3626 case Op_AbsVI:
3627 case Op_AbsVL:
3628 return true;
3629
3630 // Ternary masked operations
3631 case Op_FmaVF:
3632 case Op_FmaVD:
3633 return true;
3634
3635 case Op_MacroLogicV:
3636 if(bt != T_INT && bt != T_LONG) {
3637 return false;
3638 }
3639 return true;
3640
3641 // Binary masked operations
3642 case Op_AddVB:
3643 case Op_AddVS:
3644 case Op_SubVB:
3645 case Op_SubVS:
3646 case Op_MulVS:
3647 case Op_LShiftVS:
3648 case Op_RShiftVS:
3649 case Op_URShiftVS:
3650 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3651 if (!VM_Version::supports_avx512bw()) {
3652 return false; // Implementation limitation
3653 }
3654 return true;
3655
3656 case Op_MulVL:
3657 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3658 if (!VM_Version::supports_avx512dq()) {
3659 return false; // Implementation limitation
3660 }
3661 return true;
3662
3663 case Op_AndV:
3664 case Op_OrV:
3665 case Op_XorV:
3666 case Op_RotateRightV:
3667 case Op_RotateLeftV:
3668 if (bt != T_INT && bt != T_LONG) {
3669 return false; // Implementation limitation
3670 }
3671 return true;
3672
3673 case Op_VectorLoadMask:
3674 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3675 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3676 return false;
3677 }
3678 return true;
3679
3680 case Op_AddVI:
3681 case Op_AddVL:
3682 case Op_AddVF:
3683 case Op_AddVD:
3684 case Op_SubVI:
3685 case Op_SubVL:
3686 case Op_SubVF:
3687 case Op_SubVD:
3688 case Op_MulVI:
3689 case Op_MulVF:
3690 case Op_MulVD:
3691 case Op_DivVF:
3692 case Op_DivVD:
3693 case Op_SqrtVF:
3694 case Op_SqrtVD:
3695 case Op_LShiftVI:
3696 case Op_LShiftVL:
3697 case Op_RShiftVI:
3698 case Op_RShiftVL:
3699 case Op_URShiftVI:
3700 case Op_URShiftVL:
3701 case Op_LoadVectorMasked:
3702 case Op_StoreVectorMasked:
3703 case Op_LoadVectorGatherMasked:
3704 case Op_StoreVectorScatterMasked:
3705 return true;
3706
3707 case Op_UMinV:
3708 case Op_UMaxV:
3709 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3710 return false;
3711 } // fallthrough
3712 case Op_MaxV:
3713 case Op_MinV:
3714 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3715 return false; // Implementation limitation
3716 }
3717 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3718 return false; // Implementation limitation
3719 }
3720 return true;
3721 case Op_SaturatingAddV:
3722 case Op_SaturatingSubV:
3723 if (!is_subword_type(bt)) {
3724 return false;
3725 }
3726 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3727 return false; // Implementation limitation
3728 }
3729 return true;
3730
3731 case Op_VectorMaskCmp:
3732 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 case Op_VectorRearrange:
3738 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3739 return false; // Implementation limitation
3740 }
3741 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3742 return false; // Implementation limitation
3743 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3744 return false; // Implementation limitation
3745 }
3746 return true;
3747
3748 // Binary Logical operations
3749 case Op_AndVMask:
3750 case Op_OrVMask:
3751 case Op_XorVMask:
3752 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3753 return false; // Implementation limitation
3754 }
3755 return true;
3756
3757 case Op_PopCountVI:
3758 case Op_PopCountVL:
3759 if (!is_pop_count_instr_target(bt)) {
3760 return false;
3761 }
3762 return true;
3763
3764 case Op_MaskAll:
3765 return true;
3766
3767 case Op_CountLeadingZerosV:
3768 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3769 return true;
3770 }
3771 default:
3772 return false;
3773 }
3774 }
3775
3776 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3777 return false;
3778 }
3779
3780 // Return true if Vector::rearrange needs preparation of the shuffle argument
3781 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3782 switch (elem_bt) {
3783 case T_BYTE: return false;
3784 case T_SHORT: return !VM_Version::supports_avx512bw();
3785 case T_INT: return !VM_Version::supports_avx();
3786 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3787 default:
3788 ShouldNotReachHere();
3789 return false;
3790 }
3791 }
3792
3793 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3794 // Prefer predicate if the mask type is "TypeVectMask".
3795 return vt->isa_vectmask() != nullptr;
3796 }
3797
3798 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3799 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3800 bool legacy = (generic_opnd->opcode() == LEGVEC);
3801 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3802 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3803 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3804 return new legVecZOper();
3805 }
3806 if (legacy) {
3807 switch (ideal_reg) {
3808 case Op_VecS: return new legVecSOper();
3809 case Op_VecD: return new legVecDOper();
3810 case Op_VecX: return new legVecXOper();
3811 case Op_VecY: return new legVecYOper();
3812 case Op_VecZ: return new legVecZOper();
3813 }
3814 } else {
3815 switch (ideal_reg) {
3816 case Op_VecS: return new vecSOper();
3817 case Op_VecD: return new vecDOper();
3818 case Op_VecX: return new vecXOper();
3819 case Op_VecY: return new vecYOper();
3820 case Op_VecZ: return new vecZOper();
3821 }
3822 }
3823 ShouldNotReachHere();
3824 return nullptr;
3825 }
3826
3827 bool Matcher::is_reg2reg_move(MachNode* m) {
3828 switch (m->rule()) {
3829 case MoveVec2Leg_rule:
3830 case MoveLeg2Vec_rule:
3831 case MoveF2VL_rule:
3832 case MoveF2LEG_rule:
3833 case MoveVL2F_rule:
3834 case MoveLEG2F_rule:
3835 case MoveD2VL_rule:
3836 case MoveD2LEG_rule:
3837 case MoveVL2D_rule:
3838 case MoveLEG2D_rule:
3839 return true;
3840 default:
3841 return false;
3842 }
3843 }
3844
3845 bool Matcher::is_generic_vector(MachOper* opnd) {
3846 switch (opnd->opcode()) {
3847 case VEC:
3848 case LEGVEC:
3849 return true;
3850 default:
3851 return false;
3852 }
3853 }
3854
3855 //------------------------------------------------------------------------
3856
3857 const RegMask* Matcher::predicate_reg_mask(void) {
3858 return &_VECTMASK_REG_mask;
3859 }
3860
3861 // Max vector size in bytes. 0 if not supported.
3862 int Matcher::vector_width_in_bytes(BasicType bt) {
3863 assert(is_java_primitive(bt), "only primitive type vectors");
3864 // SSE2 supports 128bit vectors for all types.
3865 // AVX2 supports 256bit vectors for all types.
3866 // AVX2/EVEX supports 512bit vectors for all types.
3867 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3868 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3869 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3870 size = (UseAVX > 2) ? 64 : 32;
3871 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3872 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3873 // Use flag to limit vector size.
3874 size = MIN2(size,(int)MaxVectorSize);
3875 // Minimum 2 values in vector (or 4 for bytes).
3876 switch (bt) {
3877 case T_DOUBLE:
3878 case T_LONG:
3879 if (size < 16) return 0;
3880 break;
3881 case T_FLOAT:
3882 case T_INT:
3883 if (size < 8) return 0;
3884 break;
3885 case T_BOOLEAN:
3886 if (size < 4) return 0;
3887 break;
3888 case T_CHAR:
3889 if (size < 4) return 0;
3890 break;
3891 case T_BYTE:
3892 if (size < 4) return 0;
3893 break;
3894 case T_SHORT:
3895 if (size < 4) return 0;
3896 break;
3897 default:
3898 ShouldNotReachHere();
3899 }
3900 return size;
3901 }
3902
3903 // Limits on vector size (number of elements) loaded into vector.
3904 int Matcher::max_vector_size(const BasicType bt) {
3905 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3906 }
3907 int Matcher::min_vector_size(const BasicType bt) {
3908 int max_size = max_vector_size(bt);
3909 // Min size which can be loaded into vector is 4 bytes.
3910 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3911 // Support for calling svml double64 vectors
3912 if (bt == T_DOUBLE) {
3913 size = 1;
3914 }
3915 return MIN2(size,max_size);
3916 }
3917
3918 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3919 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3920 // by default on Cascade Lake
3921 if (VM_Version::is_default_intel_cascade_lake()) {
3922 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3923 }
3924 return Matcher::max_vector_size(bt);
3925 }
3926
3927 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3928 return -1;
3929 }
3930
3931 // Vector ideal reg corresponding to specified size in bytes
3932 uint Matcher::vector_ideal_reg(int size) {
3933 assert(MaxVectorSize >= size, "");
3934 switch(size) {
3935 case 4: return Op_VecS;
3936 case 8: return Op_VecD;
3937 case 16: return Op_VecX;
3938 case 32: return Op_VecY;
3939 case 64: return Op_VecZ;
3940 }
3941 ShouldNotReachHere();
3942 return 0;
3943 }
3944
3945 // Check for shift by small constant as well
3946 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3947 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3948 shift->in(2)->get_int() <= 3 &&
3949 // Are there other uses besides address expressions?
3950 !matcher->is_visited(shift)) {
3951 address_visited.set(shift->_idx); // Flag as address_visited
3952 mstack.push(shift->in(2), Matcher::Visit);
3953 Node *conv = shift->in(1);
3954 // Allow Matcher to match the rule which bypass
3955 // ConvI2L operation for an array index on LP64
3956 // if the index value is positive.
3957 if (conv->Opcode() == Op_ConvI2L &&
3958 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3959 // Are there other uses besides address expressions?
3960 !matcher->is_visited(conv)) {
3961 address_visited.set(conv->_idx); // Flag as address_visited
3962 mstack.push(conv->in(1), Matcher::Pre_Visit);
3963 } else {
3964 mstack.push(conv, Matcher::Pre_Visit);
3965 }
3966 return true;
3967 }
3968 return false;
3969 }
3970
3971 // This function identifies sub-graphs in which a 'load' node is
3972 // input to two different nodes, and such that it can be matched
3973 // with BMI instructions like blsi, blsr, etc.
3974 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3975 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3976 // refers to the same node.
3977 //
3978 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3979 // This is a temporary solution until we make DAGs expressible in ADL.
3980 template<typename ConType>
3981 class FusedPatternMatcher {
3982 Node* _op1_node;
3983 Node* _mop_node;
3984 int _con_op;
3985
3986 static int match_next(Node* n, int next_op, int next_op_idx) {
3987 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3988 return -1;
3989 }
3990
3991 if (next_op_idx == -1) { // n is commutative, try rotations
3992 if (n->in(1)->Opcode() == next_op) {
3993 return 1;
3994 } else if (n->in(2)->Opcode() == next_op) {
3995 return 2;
3996 }
3997 } else {
3998 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3999 if (n->in(next_op_idx)->Opcode() == next_op) {
4000 return next_op_idx;
4001 }
4002 }
4003 return -1;
4004 }
4005
4006 public:
4007 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4008 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4009
4010 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4011 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4012 typename ConType::NativeType con_value) {
4013 if (_op1_node->Opcode() != op1) {
4014 return false;
4015 }
4016 if (_mop_node->outcnt() > 2) {
4017 return false;
4018 }
4019 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4020 if (op1_op2_idx == -1) {
4021 return false;
4022 }
4023 // Memory operation must be the other edge
4024 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4025
4026 // Check that the mop node is really what we want
4027 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4028 Node* op2_node = _op1_node->in(op1_op2_idx);
4029 if (op2_node->outcnt() > 1) {
4030 return false;
4031 }
4032 assert(op2_node->Opcode() == op2, "Should be");
4033 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4034 if (op2_con_idx == -1) {
4035 return false;
4036 }
4037 // Memory operation must be the other edge
4038 int op2_mop_idx = (op2_con_idx & 1) + 1;
4039 // Check that the memory operation is the same node
4040 if (op2_node->in(op2_mop_idx) == _mop_node) {
4041 // Now check the constant
4042 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4043 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4044 return true;
4045 }
4046 }
4047 }
4048 return false;
4049 }
4050 };
4051
4052 static bool is_bmi_pattern(Node* n, Node* m) {
4053 assert(UseBMI1Instructions, "sanity");
4054 if (n != nullptr && m != nullptr) {
4055 if (m->Opcode() == Op_LoadI) {
4056 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4057 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4058 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4059 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4060 } else if (m->Opcode() == Op_LoadL) {
4061 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4062 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4063 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4064 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4065 }
4066 }
4067 return false;
4068 }
4069
4070 // Should the matcher clone input 'm' of node 'n'?
4071 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4072 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4073 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4074 mstack.push(m, Visit);
4075 return true;
4076 }
4077 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4078 mstack.push(m, Visit); // m = ShiftCntV
4079 return true;
4080 }
4081 if (is_encode_and_store_pattern(n, m)) {
4082 mstack.push(m, Visit);
4083 return true;
4084 }
4085 return false;
4086 }
4087
4088 // Should the Matcher clone shifts on addressing modes, expecting them
4089 // to be subsumed into complex addressing expressions or compute them
4090 // into registers?
4091 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4092 Node *off = m->in(AddPNode::Offset);
4093 if (off->is_Con()) {
4094 address_visited.test_set(m->_idx); // Flag as address_visited
4095 Node *adr = m->in(AddPNode::Address);
4096
4097 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4098 // AtomicAdd is not an addressing expression.
4099 // Cheap to find it by looking for screwy base.
4100 if (adr->is_AddP() &&
4101 !adr->in(AddPNode::Base)->is_top() &&
4102 !adr->in(AddPNode::Offset)->is_Con() &&
4103 off->get_long() == (int) (off->get_long()) && // immL32
4104 // Are there other uses besides address expressions?
4105 !is_visited(adr)) {
4106 address_visited.set(adr->_idx); // Flag as address_visited
4107 Node *shift = adr->in(AddPNode::Offset);
4108 if (!clone_shift(shift, this, mstack, address_visited)) {
4109 mstack.push(shift, Pre_Visit);
4110 }
4111 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4112 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4113 } else {
4114 mstack.push(adr, Pre_Visit);
4115 }
4116
4117 // Clone X+offset as it also folds into most addressing expressions
4118 mstack.push(off, Visit);
4119 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4120 return true;
4121 } else if (clone_shift(off, this, mstack, address_visited)) {
4122 address_visited.test_set(m->_idx); // Flag as address_visited
4123 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4124 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4125 return true;
4126 }
4127 return false;
4128 }
4129
4130 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4131 switch (bt) {
4132 case BoolTest::eq:
4133 return Assembler::eq;
4134 case BoolTest::ne:
4135 return Assembler::neq;
4136 case BoolTest::le:
4137 case BoolTest::ule:
4138 return Assembler::le;
4139 case BoolTest::ge:
4140 case BoolTest::uge:
4141 return Assembler::nlt;
4142 case BoolTest::lt:
4143 case BoolTest::ult:
4144 return Assembler::lt;
4145 case BoolTest::gt:
4146 case BoolTest::ugt:
4147 return Assembler::nle;
4148 default : ShouldNotReachHere(); return Assembler::_false;
4149 }
4150 }
4151
4152 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4153 switch (bt) {
4154 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4155 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4156 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4157 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4158 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4159 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4160 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4161 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4162 }
4163 }
4164
4165 // Helper methods for MachSpillCopyNode::implementation().
4166 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4167 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4168 assert(ireg == Op_VecS || // 32bit vector
4169 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4170 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4171 "no non-adjacent vector moves" );
4172 if (masm) {
4173 switch (ireg) {
4174 case Op_VecS: // copy whole register
4175 case Op_VecD:
4176 case Op_VecX:
4177 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4178 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4179 } else {
4180 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4181 }
4182 break;
4183 case Op_VecY:
4184 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4185 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4186 } else {
4187 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4188 }
4189 break;
4190 case Op_VecZ:
4191 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4192 break;
4193 default:
4194 ShouldNotReachHere();
4195 }
4196 #ifndef PRODUCT
4197 } else {
4198 switch (ireg) {
4199 case Op_VecS:
4200 case Op_VecD:
4201 case Op_VecX:
4202 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4203 break;
4204 case Op_VecY:
4205 case Op_VecZ:
4206 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4207 break;
4208 default:
4209 ShouldNotReachHere();
4210 }
4211 #endif
4212 }
4213 }
4214
4215 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4216 int stack_offset, int reg, uint ireg, outputStream* st) {
4217 if (masm) {
4218 if (is_load) {
4219 switch (ireg) {
4220 case Op_VecS:
4221 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4222 break;
4223 case Op_VecD:
4224 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4225 break;
4226 case Op_VecX:
4227 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4228 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4229 } else {
4230 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4231 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4232 }
4233 break;
4234 case Op_VecY:
4235 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4236 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4237 } else {
4238 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4239 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4240 }
4241 break;
4242 case Op_VecZ:
4243 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4244 break;
4245 default:
4246 ShouldNotReachHere();
4247 }
4248 } else { // store
4249 switch (ireg) {
4250 case Op_VecS:
4251 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4252 break;
4253 case Op_VecD:
4254 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4255 break;
4256 case Op_VecX:
4257 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4258 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4259 }
4260 else {
4261 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4262 }
4263 break;
4264 case Op_VecY:
4265 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4266 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4267 }
4268 else {
4269 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4270 }
4271 break;
4272 case Op_VecZ:
4273 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4274 break;
4275 default:
4276 ShouldNotReachHere();
4277 }
4278 }
4279 #ifndef PRODUCT
4280 } else {
4281 if (is_load) {
4282 switch (ireg) {
4283 case Op_VecS:
4284 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4285 break;
4286 case Op_VecD:
4287 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4288 break;
4289 case Op_VecX:
4290 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4291 break;
4292 case Op_VecY:
4293 case Op_VecZ:
4294 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4295 break;
4296 default:
4297 ShouldNotReachHere();
4298 }
4299 } else { // store
4300 switch (ireg) {
4301 case Op_VecS:
4302 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4303 break;
4304 case Op_VecD:
4305 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4306 break;
4307 case Op_VecX:
4308 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4309 break;
4310 case Op_VecY:
4311 case Op_VecZ:
4312 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4313 break;
4314 default:
4315 ShouldNotReachHere();
4316 }
4317 }
4318 #endif
4319 }
4320 }
4321
4322 template <class T>
4323 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4324 int size = type2aelembytes(bt) * len;
4325 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4326 for (int i = 0; i < len; i++) {
4327 int offset = i * type2aelembytes(bt);
4328 switch (bt) {
4329 case T_BYTE: val->at(i) = con; break;
4330 case T_SHORT: {
4331 jshort c = con;
4332 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4333 break;
4334 }
4335 case T_INT: {
4336 jint c = con;
4337 memcpy(val->adr_at(offset), &c, sizeof(jint));
4338 break;
4339 }
4340 case T_LONG: {
4341 jlong c = con;
4342 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4343 break;
4344 }
4345 case T_FLOAT: {
4346 jfloat c = con;
4347 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4348 break;
4349 }
4350 case T_DOUBLE: {
4351 jdouble c = con;
4352 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4353 break;
4354 }
4355 default: assert(false, "%s", type2name(bt));
4356 }
4357 }
4358 return val;
4359 }
4360
4361 static inline jlong high_bit_set(BasicType bt) {
4362 switch (bt) {
4363 case T_BYTE: return 0x8080808080808080;
4364 case T_SHORT: return 0x8000800080008000;
4365 case T_INT: return 0x8000000080000000;
4366 case T_LONG: return 0x8000000000000000;
4367 default:
4368 ShouldNotReachHere();
4369 return 0;
4370 }
4371 }
4372
4373 #ifndef PRODUCT
4374 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4375 st->print("nop \t# %d bytes pad for loops and calls", _count);
4376 }
4377 #endif
4378
4379 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4380 __ nop(_count);
4381 }
4382
4383 uint MachNopNode::size(PhaseRegAlloc*) const {
4384 return _count;
4385 }
4386
4387 #ifndef PRODUCT
4388 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4389 st->print("# breakpoint");
4390 }
4391 #endif
4392
4393 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4394 __ int3();
4395 }
4396
4397 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4398 return MachNode::size(ra_);
4399 }
4400
4401 %}
4402
4403 //----------ENCODING BLOCK-----------------------------------------------------
4404 // This block specifies the encoding classes used by the compiler to
4405 // output byte streams. Encoding classes are parameterized macros
4406 // used by Machine Instruction Nodes in order to generate the bit
4407 // encoding of the instruction. Operands specify their base encoding
4408 // interface with the interface keyword. There are currently
4409 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4410 // COND_INTER. REG_INTER causes an operand to generate a function
4411 // which returns its register number when queried. CONST_INTER causes
4412 // an operand to generate a function which returns the value of the
4413 // constant when queried. MEMORY_INTER causes an operand to generate
4414 // four functions which return the Base Register, the Index Register,
4415 // the Scale Value, and the Offset Value of the operand when queried.
4416 // COND_INTER causes an operand to generate six functions which return
4417 // the encoding code (ie - encoding bits for the instruction)
4418 // associated with each basic boolean condition for a conditional
4419 // instruction.
4420 //
4421 // Instructions specify two basic values for encoding. Again, a
4422 // function is available to check if the constant displacement is an
4423 // oop. They use the ins_encode keyword to specify their encoding
4424 // classes (which must be a sequence of enc_class names, and their
4425 // parameters, specified in the encoding block), and they use the
4426 // opcode keyword to specify, in order, their primary, secondary, and
4427 // tertiary opcode. Only the opcode sections which a particular
4428 // instruction needs for encoding need to be specified.
4429 encode %{
4430 enc_class cdql_enc(no_rax_rdx_RegI div)
4431 %{
4432 // Full implementation of Java idiv and irem; checks for
4433 // special case as described in JVM spec., p.243 & p.271.
4434 //
4435 // normal case special case
4436 //
4437 // input : rax: dividend min_int
4438 // reg: divisor -1
4439 //
4440 // output: rax: quotient (= rax idiv reg) min_int
4441 // rdx: remainder (= rax irem reg) 0
4442 //
4443 // Code sequnce:
4444 //
4445 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4446 // 5: 75 07/08 jne e <normal>
4447 // 7: 33 d2 xor %edx,%edx
4448 // [div >= 8 -> offset + 1]
4449 // [REX_B]
4450 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4451 // c: 74 03/04 je 11 <done>
4452 // 000000000000000e <normal>:
4453 // e: 99 cltd
4454 // [div >= 8 -> offset + 1]
4455 // [REX_B]
4456 // f: f7 f9 idiv $div
4457 // 0000000000000011 <done>:
4458 Label normal;
4459 Label done;
4460
4461 // cmp $0x80000000,%eax
4462 __ cmpl(as_Register(RAX_enc), 0x80000000);
4463
4464 // jne e <normal>
4465 __ jccb(Assembler::notEqual, normal);
4466
4467 // xor %edx,%edx
4468 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4469
4470 // cmp $0xffffffffffffffff,%ecx
4471 __ cmpl($div$$Register, -1);
4472
4473 // je 11 <done>
4474 __ jccb(Assembler::equal, done);
4475
4476 // <normal>
4477 // cltd
4478 __ bind(normal);
4479 __ cdql();
4480
4481 // idivl
4482 // <done>
4483 __ idivl($div$$Register);
4484 __ bind(done);
4485 %}
4486
4487 enc_class cdqq_enc(no_rax_rdx_RegL div)
4488 %{
4489 // Full implementation of Java ldiv and lrem; checks for
4490 // special case as described in JVM spec., p.243 & p.271.
4491 //
4492 // normal case special case
4493 //
4494 // input : rax: dividend min_long
4495 // reg: divisor -1
4496 //
4497 // output: rax: quotient (= rax idiv reg) min_long
4498 // rdx: remainder (= rax irem reg) 0
4499 //
4500 // Code sequnce:
4501 //
4502 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4503 // 7: 00 00 80
4504 // a: 48 39 d0 cmp %rdx,%rax
4505 // d: 75 08 jne 17 <normal>
4506 // f: 33 d2 xor %edx,%edx
4507 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4508 // 15: 74 05 je 1c <done>
4509 // 0000000000000017 <normal>:
4510 // 17: 48 99 cqto
4511 // 19: 48 f7 f9 idiv $div
4512 // 000000000000001c <done>:
4513 Label normal;
4514 Label done;
4515
4516 // mov $0x8000000000000000,%rdx
4517 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4518
4519 // cmp %rdx,%rax
4520 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4521
4522 // jne 17 <normal>
4523 __ jccb(Assembler::notEqual, normal);
4524
4525 // xor %edx,%edx
4526 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4527
4528 // cmp $0xffffffffffffffff,$div
4529 __ cmpq($div$$Register, -1);
4530
4531 // je 1e <done>
4532 __ jccb(Assembler::equal, done);
4533
4534 // <normal>
4535 // cqto
4536 __ bind(normal);
4537 __ cdqq();
4538
4539 // idivq (note: must be emitted by the user of this rule)
4540 // <done>
4541 __ idivq($div$$Register);
4542 __ bind(done);
4543 %}
4544
4545 enc_class clear_avx %{
4546 DEBUG_ONLY(int off0 = __ offset());
4547 if (generate_vzeroupper(Compile::current())) {
4548 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4549 // Clear upper bits of YMM registers when current compiled code uses
4550 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4551 __ vzeroupper();
4552 }
4553 DEBUG_ONLY(int off1 = __ offset());
4554 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4555 %}
4556
4557 enc_class Java_To_Runtime(method meth) %{
4558 __ lea(r10, RuntimeAddress((address)$meth$$method));
4559 __ call(r10);
4560 __ post_call_nop();
4561 %}
4562
4563 enc_class Java_Static_Call(method meth)
4564 %{
4565 // JAVA STATIC CALL
4566 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4567 // determine who we intended to call.
4568 if (!_method) {
4569 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4570 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4571 // The NOP here is purely to ensure that eliding a call to
4572 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4573 __ addr_nop_5();
4574 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4575 } else {
4576 int method_index = resolved_method_index(masm);
4577 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4578 : static_call_Relocation::spec(method_index);
4579 address mark = __ pc();
4580 int call_offset = __ offset();
4581 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4582 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4583 // Calls of the same statically bound method can share
4584 // a stub to the interpreter.
4585 __ code()->shared_stub_to_interp_for(_method, call_offset);
4586 } else {
4587 // Emit stubs for static call.
4588 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4589 __ clear_inst_mark();
4590 if (stub == nullptr) {
4591 ciEnv::current()->record_failure("CodeCache is full");
4592 return;
4593 }
4594 }
4595 }
4596 __ post_call_nop();
4597 %}
4598
4599 enc_class Java_Dynamic_Call(method meth) %{
4600 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4601 __ post_call_nop();
4602 %}
4603
4604 enc_class call_epilog %{
4605 if (VerifyStackAtCalls) {
4606 // Check that stack depth is unchanged: find majik cookie on stack
4607 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4608 Label L;
4609 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4610 __ jccb(Assembler::equal, L);
4611 // Die if stack mismatch
4612 __ int3();
4613 __ bind(L);
4614 }
4615 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4616 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4617 // Search for the corresponding projection, get the register and emit code that initialized it.
4618 uint con = (tf()->range_cc()->cnt() - 1);
4619 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4620 ProjNode* proj = fast_out(i)->as_Proj();
4621 if (proj->_con == con) {
4622 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4623 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4624 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4625 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4626 __ testq(rax, rax);
4627 __ setb(Assembler::notZero, toReg);
4628 __ movzbl(toReg, toReg);
4629 if (reg->is_stack()) {
4630 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4631 __ movq(Address(rsp, st_off), toReg);
4632 }
4633 break;
4634 }
4635 }
4636 if (return_value_is_used()) {
4637 // An inline type is returned as fields in multiple registers.
4638 // Rax either contains an oop if the inline type is buffered or a pointer
4639 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4640 // if the lowest bit is set to allow C2 to use the oop after null checking.
4641 // rax &= (rax & 1) - 1
4642 __ movptr(rscratch1, rax);
4643 __ andptr(rscratch1, 0x1);
4644 __ subptr(rscratch1, 0x1);
4645 __ andptr(rax, rscratch1);
4646 }
4647 }
4648 %}
4649
4650 %}
4651
4652 //----------FRAME--------------------------------------------------------------
4653 // Definition of frame structure and management information.
4654 //
4655 // S T A C K L A Y O U T Allocators stack-slot number
4656 // | (to get allocators register number
4657 // G Owned by | | v add OptoReg::stack0())
4658 // r CALLER | |
4659 // o | +--------+ pad to even-align allocators stack-slot
4660 // w V | pad0 | numbers; owned by CALLER
4661 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4662 // h ^ | in | 5
4663 // | | args | 4 Holes in incoming args owned by SELF
4664 // | | | | 3
4665 // | | +--------+
4666 // V | | old out| Empty on Intel, window on Sparc
4667 // | old |preserve| Must be even aligned.
4668 // | SP-+--------+----> Matcher::_old_SP, even aligned
4669 // | | in | 3 area for Intel ret address
4670 // Owned by |preserve| Empty on Sparc.
4671 // SELF +--------+
4672 // | | pad2 | 2 pad to align old SP
4673 // | +--------+ 1
4674 // | | locks | 0
4675 // | +--------+----> OptoReg::stack0(), even aligned
4676 // | | pad1 | 11 pad to align new SP
4677 // | +--------+
4678 // | | | 10
4679 // | | spills | 9 spills
4680 // V | | 8 (pad0 slot for callee)
4681 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4682 // ^ | out | 7
4683 // | | args | 6 Holes in outgoing args owned by CALLEE
4684 // Owned by +--------+
4685 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4686 // | new |preserve| Must be even-aligned.
4687 // | SP-+--------+----> Matcher::_new_SP, even aligned
4688 // | | |
4689 //
4690 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4691 // known from SELF's arguments and the Java calling convention.
4692 // Region 6-7 is determined per call site.
4693 // Note 2: If the calling convention leaves holes in the incoming argument
4694 // area, those holes are owned by SELF. Holes in the outgoing area
4695 // are owned by the CALLEE. Holes should not be necessary in the
4696 // incoming area, as the Java calling convention is completely under
4697 // the control of the AD file. Doubles can be sorted and packed to
4698 // avoid holes. Holes in the outgoing arguments may be necessary for
4699 // varargs C calling conventions.
4700 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4701 // even aligned with pad0 as needed.
4702 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4703 // region 6-11 is even aligned; it may be padded out more so that
4704 // the region from SP to FP meets the minimum stack alignment.
4705 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4706 // alignment. Region 11, pad1, may be dynamically extended so that
4707 // SP meets the minimum alignment.
4708
4709 frame
4710 %{
4711 // These three registers define part of the calling convention
4712 // between compiled code and the interpreter.
4713 inline_cache_reg(RAX); // Inline Cache Register
4714
4715 // Optional: name the operand used by cisc-spilling to access
4716 // [stack_pointer + offset]
4717 cisc_spilling_operand_name(indOffset32);
4718
4719 // Number of stack slots consumed by locking an object
4720 sync_stack_slots(2);
4721
4722 // Compiled code's Frame Pointer
4723 frame_pointer(RSP);
4724
4725 // Interpreter stores its frame pointer in a register which is
4726 // stored to the stack by I2CAdaptors.
4727 // I2CAdaptors convert from interpreted java to compiled java.
4728 interpreter_frame_pointer(RBP);
4729
4730 // Stack alignment requirement
4731 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4732
4733 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4734 // for calls to C. Supports the var-args backing area for register parms.
4735 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4736
4737 // The after-PROLOG location of the return address. Location of
4738 // return address specifies a type (REG or STACK) and a number
4739 // representing the register number (i.e. - use a register name) or
4740 // stack slot.
4741 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4742 // Otherwise, it is above the locks and verification slot and alignment word
4743 return_addr(STACK - 2 +
4744 align_up((Compile::current()->in_preserve_stack_slots() +
4745 Compile::current()->fixed_slots()),
4746 stack_alignment_in_slots()));
4747
4748 // Location of compiled Java return values. Same as C for now.
4749 return_value
4750 %{
4751 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4752 "only return normal values");
4753
4754 static const int lo[Op_RegL + 1] = {
4755 0,
4756 0,
4757 RAX_num, // Op_RegN
4758 RAX_num, // Op_RegI
4759 RAX_num, // Op_RegP
4760 XMM0_num, // Op_RegF
4761 XMM0_num, // Op_RegD
4762 RAX_num // Op_RegL
4763 };
4764 static const int hi[Op_RegL + 1] = {
4765 0,
4766 0,
4767 OptoReg::Bad, // Op_RegN
4768 OptoReg::Bad, // Op_RegI
4769 RAX_H_num, // Op_RegP
4770 OptoReg::Bad, // Op_RegF
4771 XMM0b_num, // Op_RegD
4772 RAX_H_num // Op_RegL
4773 };
4774 // Excluded flags and vector registers.
4775 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4776 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4777 %}
4778 %}
4779
4780 //----------ATTRIBUTES---------------------------------------------------------
4781 //----------Operand Attributes-------------------------------------------------
4782 op_attrib op_cost(0); // Required cost attribute
4783
4784 //----------Instruction Attributes---------------------------------------------
4785 ins_attrib ins_cost(100); // Required cost attribute
4786 ins_attrib ins_size(8); // Required size attribute (in bits)
4787 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4788 // a non-matching short branch variant
4789 // of some long branch?
4790 ins_attrib ins_alignment(1); // Required alignment attribute (must
4791 // be a power of 2) specifies the
4792 // alignment that some part of the
4793 // instruction (not necessarily the
4794 // start) requires. If > 1, a
4795 // compute_padding() function must be
4796 // provided for the instruction
4797
4798 // Whether this node is expanded during code emission into a sequence of
4799 // instructions and the first instruction can perform an implicit null check.
4800 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4801
4802 //----------OPERANDS-----------------------------------------------------------
4803 // Operand definitions must precede instruction definitions for correct parsing
4804 // in the ADLC because operands constitute user defined types which are used in
4805 // instruction definitions.
4806
4807 //----------Simple Operands----------------------------------------------------
4808 // Immediate Operands
4809 // Integer Immediate
4810 operand immI()
4811 %{
4812 match(ConI);
4813
4814 op_cost(10);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 // Constant for test vs zero
4820 operand immI_0()
4821 %{
4822 predicate(n->get_int() == 0);
4823 match(ConI);
4824
4825 op_cost(0);
4826 format %{ %}
4827 interface(CONST_INTER);
4828 %}
4829
4830 // Constant for increment
4831 operand immI_1()
4832 %{
4833 predicate(n->get_int() == 1);
4834 match(ConI);
4835
4836 op_cost(0);
4837 format %{ %}
4838 interface(CONST_INTER);
4839 %}
4840
4841 // Constant for decrement
4842 operand immI_M1()
4843 %{
4844 predicate(n->get_int() == -1);
4845 match(ConI);
4846
4847 op_cost(0);
4848 format %{ %}
4849 interface(CONST_INTER);
4850 %}
4851
4852 operand immI_2()
4853 %{
4854 predicate(n->get_int() == 2);
4855 match(ConI);
4856
4857 op_cost(0);
4858 format %{ %}
4859 interface(CONST_INTER);
4860 %}
4861
4862 operand immI_4()
4863 %{
4864 predicate(n->get_int() == 4);
4865 match(ConI);
4866
4867 op_cost(0);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 operand immI_8()
4873 %{
4874 predicate(n->get_int() == 8);
4875 match(ConI);
4876
4877 op_cost(0);
4878 format %{ %}
4879 interface(CONST_INTER);
4880 %}
4881
4882 // Valid scale values for addressing modes
4883 operand immI2()
4884 %{
4885 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4886 match(ConI);
4887
4888 format %{ %}
4889 interface(CONST_INTER);
4890 %}
4891
4892 operand immU7()
4893 %{
4894 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4895 match(ConI);
4896
4897 op_cost(5);
4898 format %{ %}
4899 interface(CONST_INTER);
4900 %}
4901
4902 operand immI8()
4903 %{
4904 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4905 match(ConI);
4906
4907 op_cost(5);
4908 format %{ %}
4909 interface(CONST_INTER);
4910 %}
4911
4912 operand immU8()
4913 %{
4914 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4915 match(ConI);
4916
4917 op_cost(5);
4918 format %{ %}
4919 interface(CONST_INTER);
4920 %}
4921
4922 operand immI16()
4923 %{
4924 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4925 match(ConI);
4926
4927 op_cost(10);
4928 format %{ %}
4929 interface(CONST_INTER);
4930 %}
4931
4932 // Int Immediate non-negative
4933 operand immU31()
4934 %{
4935 predicate(n->get_int() >= 0);
4936 match(ConI);
4937
4938 op_cost(0);
4939 format %{ %}
4940 interface(CONST_INTER);
4941 %}
4942
4943 // Pointer Immediate
4944 operand immP()
4945 %{
4946 match(ConP);
4947
4948 op_cost(10);
4949 format %{ %}
4950 interface(CONST_INTER);
4951 %}
4952
4953 // Null Pointer Immediate
4954 operand immP0()
4955 %{
4956 predicate(n->get_ptr() == 0);
4957 match(ConP);
4958
4959 op_cost(5);
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 // Pointer Immediate
4965 operand immN() %{
4966 match(ConN);
4967
4968 op_cost(10);
4969 format %{ %}
4970 interface(CONST_INTER);
4971 %}
4972
4973 operand immNKlass() %{
4974 match(ConNKlass);
4975
4976 op_cost(10);
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Null Pointer Immediate
4982 operand immN0() %{
4983 predicate(n->get_narrowcon() == 0);
4984 match(ConN);
4985
4986 op_cost(5);
4987 format %{ %}
4988 interface(CONST_INTER);
4989 %}
4990
4991 operand immP31()
4992 %{
4993 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4994 && (n->get_ptr() >> 31) == 0);
4995 match(ConP);
4996
4997 op_cost(5);
4998 format %{ %}
4999 interface(CONST_INTER);
5000 %}
5001
5002
5003 // Long Immediate
5004 operand immL()
5005 %{
5006 match(ConL);
5007
5008 op_cost(20);
5009 format %{ %}
5010 interface(CONST_INTER);
5011 %}
5012
5013 // Long Immediate 8-bit
5014 operand immL8()
5015 %{
5016 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5017 match(ConL);
5018
5019 op_cost(5);
5020 format %{ %}
5021 interface(CONST_INTER);
5022 %}
5023
5024 // Long Immediate 32-bit unsigned
5025 operand immUL32()
5026 %{
5027 predicate(n->get_long() == (unsigned int) (n->get_long()));
5028 match(ConL);
5029
5030 op_cost(10);
5031 format %{ %}
5032 interface(CONST_INTER);
5033 %}
5034
5035 // Long Immediate 32-bit signed
5036 operand immL32()
5037 %{
5038 predicate(n->get_long() == (int) (n->get_long()));
5039 match(ConL);
5040
5041 op_cost(15);
5042 format %{ %}
5043 interface(CONST_INTER);
5044 %}
5045
5046 operand immL_Pow2()
5047 %{
5048 predicate(is_power_of_2((julong)n->get_long()));
5049 match(ConL);
5050
5051 op_cost(15);
5052 format %{ %}
5053 interface(CONST_INTER);
5054 %}
5055
5056 operand immL_NotPow2()
5057 %{
5058 predicate(is_power_of_2((julong)~n->get_long()));
5059 match(ConL);
5060
5061 op_cost(15);
5062 format %{ %}
5063 interface(CONST_INTER);
5064 %}
5065
5066 // Long Immediate zero
5067 operand immL0()
5068 %{
5069 predicate(n->get_long() == 0L);
5070 match(ConL);
5071
5072 op_cost(10);
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Constant for increment
5078 operand immL1()
5079 %{
5080 predicate(n->get_long() == 1);
5081 match(ConL);
5082
5083 format %{ %}
5084 interface(CONST_INTER);
5085 %}
5086
5087 // Constant for decrement
5088 operand immL_M1()
5089 %{
5090 predicate(n->get_long() == -1);
5091 match(ConL);
5092
5093 format %{ %}
5094 interface(CONST_INTER);
5095 %}
5096
5097 // Long Immediate: low 32-bit mask
5098 operand immL_32bits()
5099 %{
5100 predicate(n->get_long() == 0xFFFFFFFFL);
5101 match(ConL);
5102 op_cost(20);
5103
5104 format %{ %}
5105 interface(CONST_INTER);
5106 %}
5107
5108 // Int Immediate: 2^n-1, positive
5109 operand immI_Pow2M1()
5110 %{
5111 predicate((n->get_int() > 0)
5112 && is_power_of_2((juint)n->get_int() + 1));
5113 match(ConI);
5114
5115 op_cost(20);
5116 format %{ %}
5117 interface(CONST_INTER);
5118 %}
5119
5120 // Float Immediate zero
5121 operand immF0()
5122 %{
5123 predicate(jint_cast(n->getf()) == 0);
5124 match(ConF);
5125
5126 op_cost(5);
5127 format %{ %}
5128 interface(CONST_INTER);
5129 %}
5130
5131 // Float Immediate
5132 operand immF()
5133 %{
5134 match(ConF);
5135
5136 op_cost(15);
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 // Half Float Immediate
5142 operand immH()
5143 %{
5144 match(ConH);
5145
5146 op_cost(15);
5147 format %{ %}
5148 interface(CONST_INTER);
5149 %}
5150
5151 // Double Immediate zero
5152 operand immD0()
5153 %{
5154 predicate(jlong_cast(n->getd()) == 0);
5155 match(ConD);
5156
5157 op_cost(5);
5158 format %{ %}
5159 interface(CONST_INTER);
5160 %}
5161
5162 // Double Immediate
5163 operand immD()
5164 %{
5165 match(ConD);
5166
5167 op_cost(15);
5168 format %{ %}
5169 interface(CONST_INTER);
5170 %}
5171
5172 // Immediates for special shifts (sign extend)
5173
5174 // Constants for increment
5175 operand immI_16()
5176 %{
5177 predicate(n->get_int() == 16);
5178 match(ConI);
5179
5180 format %{ %}
5181 interface(CONST_INTER);
5182 %}
5183
5184 operand immI_24()
5185 %{
5186 predicate(n->get_int() == 24);
5187 match(ConI);
5188
5189 format %{ %}
5190 interface(CONST_INTER);
5191 %}
5192
5193 // Constant for byte-wide masking
5194 operand immI_255()
5195 %{
5196 predicate(n->get_int() == 255);
5197 match(ConI);
5198
5199 format %{ %}
5200 interface(CONST_INTER);
5201 %}
5202
5203 // Constant for short-wide masking
5204 operand immI_65535()
5205 %{
5206 predicate(n->get_int() == 65535);
5207 match(ConI);
5208
5209 format %{ %}
5210 interface(CONST_INTER);
5211 %}
5212
5213 // Constant for byte-wide masking
5214 operand immL_255()
5215 %{
5216 predicate(n->get_long() == 255);
5217 match(ConL);
5218
5219 format %{ %}
5220 interface(CONST_INTER);
5221 %}
5222
5223 // Constant for short-wide masking
5224 operand immL_65535()
5225 %{
5226 predicate(n->get_long() == 65535);
5227 match(ConL);
5228
5229 format %{ %}
5230 interface(CONST_INTER);
5231 %}
5232
5233 operand kReg()
5234 %{
5235 constraint(ALLOC_IN_RC(vectmask_reg));
5236 match(RegVectMask);
5237 format %{%}
5238 interface(REG_INTER);
5239 %}
5240
5241 // Register Operands
5242 // Integer Register
5243 operand rRegI()
5244 %{
5245 constraint(ALLOC_IN_RC(int_reg));
5246 match(RegI);
5247
5248 match(rax_RegI);
5249 match(rbx_RegI);
5250 match(rcx_RegI);
5251 match(rdx_RegI);
5252 match(rdi_RegI);
5253
5254 format %{ %}
5255 interface(REG_INTER);
5256 %}
5257
5258 // Special Registers
5259 operand rax_RegI()
5260 %{
5261 constraint(ALLOC_IN_RC(int_rax_reg));
5262 match(RegI);
5263 match(rRegI);
5264
5265 format %{ "RAX" %}
5266 interface(REG_INTER);
5267 %}
5268
5269 // Special Registers
5270 operand rbx_RegI()
5271 %{
5272 constraint(ALLOC_IN_RC(int_rbx_reg));
5273 match(RegI);
5274 match(rRegI);
5275
5276 format %{ "RBX" %}
5277 interface(REG_INTER);
5278 %}
5279
5280 operand rcx_RegI()
5281 %{
5282 constraint(ALLOC_IN_RC(int_rcx_reg));
5283 match(RegI);
5284 match(rRegI);
5285
5286 format %{ "RCX" %}
5287 interface(REG_INTER);
5288 %}
5289
5290 operand rdx_RegI()
5291 %{
5292 constraint(ALLOC_IN_RC(int_rdx_reg));
5293 match(RegI);
5294 match(rRegI);
5295
5296 format %{ "RDX" %}
5297 interface(REG_INTER);
5298 %}
5299
5300 operand rdi_RegI()
5301 %{
5302 constraint(ALLOC_IN_RC(int_rdi_reg));
5303 match(RegI);
5304 match(rRegI);
5305
5306 format %{ "RDI" %}
5307 interface(REG_INTER);
5308 %}
5309
5310 operand no_rax_rdx_RegI()
5311 %{
5312 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5313 match(RegI);
5314 match(rbx_RegI);
5315 match(rcx_RegI);
5316 match(rdi_RegI);
5317
5318 format %{ %}
5319 interface(REG_INTER);
5320 %}
5321
5322 operand no_rbp_r13_RegI()
5323 %{
5324 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5325 match(RegI);
5326 match(rRegI);
5327 match(rax_RegI);
5328 match(rbx_RegI);
5329 match(rcx_RegI);
5330 match(rdx_RegI);
5331 match(rdi_RegI);
5332
5333 format %{ %}
5334 interface(REG_INTER);
5335 %}
5336
5337 // Pointer Register
5338 operand any_RegP()
5339 %{
5340 constraint(ALLOC_IN_RC(any_reg));
5341 match(RegP);
5342 match(rax_RegP);
5343 match(rbx_RegP);
5344 match(rdi_RegP);
5345 match(rsi_RegP);
5346 match(rbp_RegP);
5347 match(r15_RegP);
5348 match(rRegP);
5349
5350 format %{ %}
5351 interface(REG_INTER);
5352 %}
5353
5354 operand rRegP()
5355 %{
5356 constraint(ALLOC_IN_RC(ptr_reg));
5357 match(RegP);
5358 match(rax_RegP);
5359 match(rbx_RegP);
5360 match(rdi_RegP);
5361 match(rsi_RegP);
5362 match(rbp_RegP); // See Q&A below about
5363 match(r15_RegP); // r15_RegP and rbp_RegP.
5364
5365 format %{ %}
5366 interface(REG_INTER);
5367 %}
5368
5369 operand rRegN() %{
5370 constraint(ALLOC_IN_RC(int_reg));
5371 match(RegN);
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5378 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5379 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5380 // The output of an instruction is controlled by the allocator, which respects
5381 // register class masks, not match rules. Unless an instruction mentions
5382 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5383 // by the allocator as an input.
5384 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5385 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5386 // result, RBP is not included in the output of the instruction either.
5387
5388 // This operand is not allowed to use RBP even if
5389 // RBP is not used to hold the frame pointer.
5390 operand no_rbp_RegP()
5391 %{
5392 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5393 match(RegP);
5394 match(rbx_RegP);
5395 match(rsi_RegP);
5396 match(rdi_RegP);
5397
5398 format %{ %}
5399 interface(REG_INTER);
5400 %}
5401
5402 // Special Registers
5403 // Return a pointer value
5404 operand rax_RegP()
5405 %{
5406 constraint(ALLOC_IN_RC(ptr_rax_reg));
5407 match(RegP);
5408 match(rRegP);
5409
5410 format %{ %}
5411 interface(REG_INTER);
5412 %}
5413
5414 // Special Registers
5415 // Return a compressed pointer value
5416 operand rax_RegN()
5417 %{
5418 constraint(ALLOC_IN_RC(int_rax_reg));
5419 match(RegN);
5420 match(rRegN);
5421
5422 format %{ %}
5423 interface(REG_INTER);
5424 %}
5425
5426 // Used in AtomicAdd
5427 operand rbx_RegP()
5428 %{
5429 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5430 match(RegP);
5431 match(rRegP);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 operand rsi_RegP()
5438 %{
5439 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5440 match(RegP);
5441 match(rRegP);
5442
5443 format %{ %}
5444 interface(REG_INTER);
5445 %}
5446
5447 operand rbp_RegP()
5448 %{
5449 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5450 match(RegP);
5451 match(rRegP);
5452
5453 format %{ %}
5454 interface(REG_INTER);
5455 %}
5456
5457 // Used in rep stosq
5458 operand rdi_RegP()
5459 %{
5460 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5461 match(RegP);
5462 match(rRegP);
5463
5464 format %{ %}
5465 interface(REG_INTER);
5466 %}
5467
5468 operand r15_RegP()
5469 %{
5470 constraint(ALLOC_IN_RC(ptr_r15_reg));
5471 match(RegP);
5472 match(rRegP);
5473
5474 format %{ %}
5475 interface(REG_INTER);
5476 %}
5477
5478 operand rRegL()
5479 %{
5480 constraint(ALLOC_IN_RC(long_reg));
5481 match(RegL);
5482 match(rax_RegL);
5483 match(rdx_RegL);
5484
5485 format %{ %}
5486 interface(REG_INTER);
5487 %}
5488
5489 // Special Registers
5490 operand no_rax_rdx_RegL()
5491 %{
5492 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5493 match(RegL);
5494 match(rRegL);
5495
5496 format %{ %}
5497 interface(REG_INTER);
5498 %}
5499
5500 operand rax_RegL()
5501 %{
5502 constraint(ALLOC_IN_RC(long_rax_reg));
5503 match(RegL);
5504 match(rRegL);
5505
5506 format %{ "RAX" %}
5507 interface(REG_INTER);
5508 %}
5509
5510 operand rcx_RegL()
5511 %{
5512 constraint(ALLOC_IN_RC(long_rcx_reg));
5513 match(RegL);
5514 match(rRegL);
5515
5516 format %{ %}
5517 interface(REG_INTER);
5518 %}
5519
5520 operand rdx_RegL()
5521 %{
5522 constraint(ALLOC_IN_RC(long_rdx_reg));
5523 match(RegL);
5524 match(rRegL);
5525
5526 format %{ %}
5527 interface(REG_INTER);
5528 %}
5529
5530 operand r11_RegL()
5531 %{
5532 constraint(ALLOC_IN_RC(long_r11_reg));
5533 match(RegL);
5534 match(rRegL);
5535
5536 format %{ %}
5537 interface(REG_INTER);
5538 %}
5539
5540 operand no_rbp_r13_RegL()
5541 %{
5542 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5543 match(RegL);
5544 match(rRegL);
5545 match(rax_RegL);
5546 match(rcx_RegL);
5547 match(rdx_RegL);
5548
5549 format %{ %}
5550 interface(REG_INTER);
5551 %}
5552
5553 // Flags register, used as output of compare instructions
5554 operand rFlagsReg()
5555 %{
5556 constraint(ALLOC_IN_RC(int_flags));
5557 match(RegFlags);
5558
5559 format %{ "RFLAGS" %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Flags register, used as output of FLOATING POINT compare instructions
5564 operand rFlagsRegU()
5565 %{
5566 constraint(ALLOC_IN_RC(int_flags));
5567 match(RegFlags);
5568
5569 format %{ "RFLAGS_U" %}
5570 interface(REG_INTER);
5571 %}
5572
5573 operand rFlagsRegUCF() %{
5574 constraint(ALLOC_IN_RC(int_flags));
5575 match(RegFlags);
5576 predicate(false);
5577
5578 format %{ "RFLAGS_U_CF" %}
5579 interface(REG_INTER);
5580 %}
5581
5582 // Float register operands
5583 operand regF() %{
5584 constraint(ALLOC_IN_RC(float_reg));
5585 match(RegF);
5586
5587 format %{ %}
5588 interface(REG_INTER);
5589 %}
5590
5591 // Float register operands
5592 operand legRegF() %{
5593 constraint(ALLOC_IN_RC(float_reg_legacy));
5594 match(RegF);
5595
5596 format %{ %}
5597 interface(REG_INTER);
5598 %}
5599
5600 // Float register operands
5601 operand vlRegF() %{
5602 constraint(ALLOC_IN_RC(float_reg_vl));
5603 match(RegF);
5604
5605 format %{ %}
5606 interface(REG_INTER);
5607 %}
5608
5609 // Double register operands
5610 operand regD() %{
5611 constraint(ALLOC_IN_RC(double_reg));
5612 match(RegD);
5613
5614 format %{ %}
5615 interface(REG_INTER);
5616 %}
5617
5618 // Double register operands
5619 operand legRegD() %{
5620 constraint(ALLOC_IN_RC(double_reg_legacy));
5621 match(RegD);
5622
5623 format %{ %}
5624 interface(REG_INTER);
5625 %}
5626
5627 // Double register operands
5628 operand vlRegD() %{
5629 constraint(ALLOC_IN_RC(double_reg_vl));
5630 match(RegD);
5631
5632 format %{ %}
5633 interface(REG_INTER);
5634 %}
5635
5636 //----------Memory Operands----------------------------------------------------
5637 // Direct Memory Operand
5638 // operand direct(immP addr)
5639 // %{
5640 // match(addr);
5641
5642 // format %{ "[$addr]" %}
5643 // interface(MEMORY_INTER) %{
5644 // base(0xFFFFFFFF);
5645 // index(0x4);
5646 // scale(0x0);
5647 // disp($addr);
5648 // %}
5649 // %}
5650
5651 // Indirect Memory Operand
5652 operand indirect(any_RegP reg)
5653 %{
5654 constraint(ALLOC_IN_RC(ptr_reg));
5655 match(reg);
5656
5657 format %{ "[$reg]" %}
5658 interface(MEMORY_INTER) %{
5659 base($reg);
5660 index(0x4);
5661 scale(0x0);
5662 disp(0x0);
5663 %}
5664 %}
5665
5666 // Indirect Memory Plus Short Offset Operand
5667 operand indOffset8(any_RegP reg, immL8 off)
5668 %{
5669 constraint(ALLOC_IN_RC(ptr_reg));
5670 match(AddP reg off);
5671
5672 format %{ "[$reg + $off (8-bit)]" %}
5673 interface(MEMORY_INTER) %{
5674 base($reg);
5675 index(0x4);
5676 scale(0x0);
5677 disp($off);
5678 %}
5679 %}
5680
5681 // Indirect Memory Plus Long Offset Operand
5682 operand indOffset32(any_RegP reg, immL32 off)
5683 %{
5684 constraint(ALLOC_IN_RC(ptr_reg));
5685 match(AddP reg off);
5686
5687 format %{ "[$reg + $off (32-bit)]" %}
5688 interface(MEMORY_INTER) %{
5689 base($reg);
5690 index(0x4);
5691 scale(0x0);
5692 disp($off);
5693 %}
5694 %}
5695
5696 // Indirect Memory Plus Index Register Plus Offset Operand
5697 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5698 %{
5699 constraint(ALLOC_IN_RC(ptr_reg));
5700 match(AddP (AddP reg lreg) off);
5701
5702 op_cost(10);
5703 format %{"[$reg + $off + $lreg]" %}
5704 interface(MEMORY_INTER) %{
5705 base($reg);
5706 index($lreg);
5707 scale(0x0);
5708 disp($off);
5709 %}
5710 %}
5711
5712 // Indirect Memory Plus Index Register Plus Offset Operand
5713 operand indIndex(any_RegP reg, rRegL lreg)
5714 %{
5715 constraint(ALLOC_IN_RC(ptr_reg));
5716 match(AddP reg lreg);
5717
5718 op_cost(10);
5719 format %{"[$reg + $lreg]" %}
5720 interface(MEMORY_INTER) %{
5721 base($reg);
5722 index($lreg);
5723 scale(0x0);
5724 disp(0x0);
5725 %}
5726 %}
5727
5728 // Indirect Memory Times Scale Plus Index Register
5729 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5730 %{
5731 constraint(ALLOC_IN_RC(ptr_reg));
5732 match(AddP reg (LShiftL lreg scale));
5733
5734 op_cost(10);
5735 format %{"[$reg + $lreg << $scale]" %}
5736 interface(MEMORY_INTER) %{
5737 base($reg);
5738 index($lreg);
5739 scale($scale);
5740 disp(0x0);
5741 %}
5742 %}
5743
5744 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5745 %{
5746 constraint(ALLOC_IN_RC(ptr_reg));
5747 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5748 match(AddP reg (LShiftL (ConvI2L idx) scale));
5749
5750 op_cost(10);
5751 format %{"[$reg + pos $idx << $scale]" %}
5752 interface(MEMORY_INTER) %{
5753 base($reg);
5754 index($idx);
5755 scale($scale);
5756 disp(0x0);
5757 %}
5758 %}
5759
5760 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5761 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5762 %{
5763 constraint(ALLOC_IN_RC(ptr_reg));
5764 match(AddP (AddP reg (LShiftL lreg scale)) off);
5765
5766 op_cost(10);
5767 format %{"[$reg + $off + $lreg << $scale]" %}
5768 interface(MEMORY_INTER) %{
5769 base($reg);
5770 index($lreg);
5771 scale($scale);
5772 disp($off);
5773 %}
5774 %}
5775
5776 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5777 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5778 %{
5779 constraint(ALLOC_IN_RC(ptr_reg));
5780 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5781 match(AddP (AddP reg (ConvI2L idx)) off);
5782
5783 op_cost(10);
5784 format %{"[$reg + $off + $idx]" %}
5785 interface(MEMORY_INTER) %{
5786 base($reg);
5787 index($idx);
5788 scale(0x0);
5789 disp($off);
5790 %}
5791 %}
5792
5793 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5794 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5795 %{
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5798 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5799
5800 op_cost(10);
5801 format %{"[$reg + $off + $idx << $scale]" %}
5802 interface(MEMORY_INTER) %{
5803 base($reg);
5804 index($idx);
5805 scale($scale);
5806 disp($off);
5807 %}
5808 %}
5809
5810 // Indirect Narrow Oop Operand
5811 operand indCompressedOop(rRegN reg) %{
5812 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 match(DecodeN reg);
5815
5816 op_cost(10);
5817 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5818 interface(MEMORY_INTER) %{
5819 base(0xc); // R12
5820 index($reg);
5821 scale(0x3);
5822 disp(0x0);
5823 %}
5824 %}
5825
5826 // Indirect Narrow Oop Plus Offset Operand
5827 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5828 // we can't free r12 even with CompressedOops::base() == nullptr.
5829 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5830 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5831 constraint(ALLOC_IN_RC(ptr_reg));
5832 match(AddP (DecodeN reg) off);
5833
5834 op_cost(10);
5835 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5836 interface(MEMORY_INTER) %{
5837 base(0xc); // R12
5838 index($reg);
5839 scale(0x3);
5840 disp($off);
5841 %}
5842 %}
5843
5844 // Indirect Memory Operand
5845 operand indirectNarrow(rRegN reg)
5846 %{
5847 predicate(CompressedOops::shift() == 0);
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 match(DecodeN reg);
5850
5851 format %{ "[$reg]" %}
5852 interface(MEMORY_INTER) %{
5853 base($reg);
5854 index(0x4);
5855 scale(0x0);
5856 disp(0x0);
5857 %}
5858 %}
5859
5860 // Indirect Memory Plus Short Offset Operand
5861 operand indOffset8Narrow(rRegN reg, immL8 off)
5862 %{
5863 predicate(CompressedOops::shift() == 0);
5864 constraint(ALLOC_IN_RC(ptr_reg));
5865 match(AddP (DecodeN reg) off);
5866
5867 format %{ "[$reg + $off (8-bit)]" %}
5868 interface(MEMORY_INTER) %{
5869 base($reg);
5870 index(0x4);
5871 scale(0x0);
5872 disp($off);
5873 %}
5874 %}
5875
5876 // Indirect Memory Plus Long Offset Operand
5877 operand indOffset32Narrow(rRegN reg, immL32 off)
5878 %{
5879 predicate(CompressedOops::shift() == 0);
5880 constraint(ALLOC_IN_RC(ptr_reg));
5881 match(AddP (DecodeN reg) off);
5882
5883 format %{ "[$reg + $off (32-bit)]" %}
5884 interface(MEMORY_INTER) %{
5885 base($reg);
5886 index(0x4);
5887 scale(0x0);
5888 disp($off);
5889 %}
5890 %}
5891
5892 // Indirect Memory Plus Index Register Plus Offset Operand
5893 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5894 %{
5895 predicate(CompressedOops::shift() == 0);
5896 constraint(ALLOC_IN_RC(ptr_reg));
5897 match(AddP (AddP (DecodeN reg) lreg) off);
5898
5899 op_cost(10);
5900 format %{"[$reg + $off + $lreg]" %}
5901 interface(MEMORY_INTER) %{
5902 base($reg);
5903 index($lreg);
5904 scale(0x0);
5905 disp($off);
5906 %}
5907 %}
5908
5909 // Indirect Memory Plus Index Register Plus Offset Operand
5910 operand indIndexNarrow(rRegN reg, rRegL lreg)
5911 %{
5912 predicate(CompressedOops::shift() == 0);
5913 constraint(ALLOC_IN_RC(ptr_reg));
5914 match(AddP (DecodeN reg) lreg);
5915
5916 op_cost(10);
5917 format %{"[$reg + $lreg]" %}
5918 interface(MEMORY_INTER) %{
5919 base($reg);
5920 index($lreg);
5921 scale(0x0);
5922 disp(0x0);
5923 %}
5924 %}
5925
5926 // Indirect Memory Times Scale Plus Index Register
5927 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5928 %{
5929 predicate(CompressedOops::shift() == 0);
5930 constraint(ALLOC_IN_RC(ptr_reg));
5931 match(AddP (DecodeN reg) (LShiftL lreg scale));
5932
5933 op_cost(10);
5934 format %{"[$reg + $lreg << $scale]" %}
5935 interface(MEMORY_INTER) %{
5936 base($reg);
5937 index($lreg);
5938 scale($scale);
5939 disp(0x0);
5940 %}
5941 %}
5942
5943 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5944 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5945 %{
5946 predicate(CompressedOops::shift() == 0);
5947 constraint(ALLOC_IN_RC(ptr_reg));
5948 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5949
5950 op_cost(10);
5951 format %{"[$reg + $off + $lreg << $scale]" %}
5952 interface(MEMORY_INTER) %{
5953 base($reg);
5954 index($lreg);
5955 scale($scale);
5956 disp($off);
5957 %}
5958 %}
5959
5960 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5961 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5962 %{
5963 constraint(ALLOC_IN_RC(ptr_reg));
5964 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5965 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5966
5967 op_cost(10);
5968 format %{"[$reg + $off + $idx]" %}
5969 interface(MEMORY_INTER) %{
5970 base($reg);
5971 index($idx);
5972 scale(0x0);
5973 disp($off);
5974 %}
5975 %}
5976
5977 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5978 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5979 %{
5980 constraint(ALLOC_IN_RC(ptr_reg));
5981 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5982 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5983
5984 op_cost(10);
5985 format %{"[$reg + $off + $idx << $scale]" %}
5986 interface(MEMORY_INTER) %{
5987 base($reg);
5988 index($idx);
5989 scale($scale);
5990 disp($off);
5991 %}
5992 %}
5993
5994 //----------Special Memory Operands--------------------------------------------
5995 // Stack Slot Operand - This operand is used for loading and storing temporary
5996 // values on the stack where a match requires a value to
5997 // flow through memory.
5998 operand stackSlotP(sRegP reg)
5999 %{
6000 constraint(ALLOC_IN_RC(stack_slots));
6001 // No match rule because this operand is only generated in matching
6002
6003 format %{ "[$reg]" %}
6004 interface(MEMORY_INTER) %{
6005 base(0x4); // RSP
6006 index(0x4); // No Index
6007 scale(0x0); // No Scale
6008 disp($reg); // Stack Offset
6009 %}
6010 %}
6011
6012 operand stackSlotI(sRegI reg)
6013 %{
6014 constraint(ALLOC_IN_RC(stack_slots));
6015 // No match rule because this operand is only generated in matching
6016
6017 format %{ "[$reg]" %}
6018 interface(MEMORY_INTER) %{
6019 base(0x4); // RSP
6020 index(0x4); // No Index
6021 scale(0x0); // No Scale
6022 disp($reg); // Stack Offset
6023 %}
6024 %}
6025
6026 operand stackSlotF(sRegF reg)
6027 %{
6028 constraint(ALLOC_IN_RC(stack_slots));
6029 // No match rule because this operand is only generated in matching
6030
6031 format %{ "[$reg]" %}
6032 interface(MEMORY_INTER) %{
6033 base(0x4); // RSP
6034 index(0x4); // No Index
6035 scale(0x0); // No Scale
6036 disp($reg); // Stack Offset
6037 %}
6038 %}
6039
6040 operand stackSlotD(sRegD reg)
6041 %{
6042 constraint(ALLOC_IN_RC(stack_slots));
6043 // No match rule because this operand is only generated in matching
6044
6045 format %{ "[$reg]" %}
6046 interface(MEMORY_INTER) %{
6047 base(0x4); // RSP
6048 index(0x4); // No Index
6049 scale(0x0); // No Scale
6050 disp($reg); // Stack Offset
6051 %}
6052 %}
6053 operand stackSlotL(sRegL reg)
6054 %{
6055 constraint(ALLOC_IN_RC(stack_slots));
6056 // No match rule because this operand is only generated in matching
6057
6058 format %{ "[$reg]" %}
6059 interface(MEMORY_INTER) %{
6060 base(0x4); // RSP
6061 index(0x4); // No Index
6062 scale(0x0); // No Scale
6063 disp($reg); // Stack Offset
6064 %}
6065 %}
6066
6067 //----------Conditional Branch Operands----------------------------------------
6068 // Comparison Op - This is the operation of the comparison, and is limited to
6069 // the following set of codes:
6070 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6071 //
6072 // Other attributes of the comparison, such as unsignedness, are specified
6073 // by the comparison instruction that sets a condition code flags register.
6074 // That result is represented by a flags operand whose subtype is appropriate
6075 // to the unsignedness (etc.) of the comparison.
6076 //
6077 // Later, the instruction which matches both the Comparison Op (a Bool) and
6078 // the flags (produced by the Cmp) specifies the coding of the comparison op
6079 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6080
6081 // Comparison Code
6082 operand cmpOp()
6083 %{
6084 match(Bool);
6085
6086 format %{ "" %}
6087 interface(COND_INTER) %{
6088 equal(0x4, "e");
6089 not_equal(0x5, "ne");
6090 less(0xC, "l");
6091 greater_equal(0xD, "ge");
6092 less_equal(0xE, "le");
6093 greater(0xF, "g");
6094 overflow(0x0, "o");
6095 no_overflow(0x1, "no");
6096 %}
6097 %}
6098
6099 // Comparison Code, unsigned compare. Used by FP also, with
6100 // C2 (unordered) turned into GT or LT already. The other bits
6101 // C0 and C3 are turned into Carry & Zero flags.
6102 operand cmpOpU()
6103 %{
6104 match(Bool);
6105
6106 format %{ "" %}
6107 interface(COND_INTER) %{
6108 equal(0x4, "e");
6109 not_equal(0x5, "ne");
6110 less(0x2, "b");
6111 greater_equal(0x3, "ae");
6112 less_equal(0x6, "be");
6113 greater(0x7, "a");
6114 overflow(0x0, "o");
6115 no_overflow(0x1, "no");
6116 %}
6117 %}
6118
6119
6120 // Floating comparisons that don't require any fixup for the unordered case,
6121 // If both inputs of the comparison are the same, ZF is always set so we
6122 // don't need to use cmpOpUCF2 for eq/ne
6123 operand cmpOpUCF() %{
6124 match(Bool);
6125 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6126 n->as_Bool()->_test._test == BoolTest::ge ||
6127 n->as_Bool()->_test._test == BoolTest::le ||
6128 n->as_Bool()->_test._test == BoolTest::gt ||
6129 n->in(1)->in(1) == n->in(1)->in(2));
6130 format %{ "" %}
6131 interface(COND_INTER) %{
6132 equal(0xb, "np");
6133 not_equal(0xa, "p");
6134 less(0x2, "b");
6135 greater_equal(0x3, "ae");
6136 less_equal(0x6, "be");
6137 greater(0x7, "a");
6138 overflow(0x0, "o");
6139 no_overflow(0x1, "no");
6140 %}
6141 %}
6142
6143
6144 // Floating comparisons that can be fixed up with extra conditional jumps
6145 operand cmpOpUCF2() %{
6146 match(Bool);
6147 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6148 n->as_Bool()->_test._test == BoolTest::eq) &&
6149 n->in(1)->in(1) != n->in(1)->in(2));
6150 format %{ "" %}
6151 interface(COND_INTER) %{
6152 equal(0x4, "e");
6153 not_equal(0x5, "ne");
6154 less(0x2, "b");
6155 greater_equal(0x3, "ae");
6156 less_equal(0x6, "be");
6157 greater(0x7, "a");
6158 overflow(0x0, "o");
6159 no_overflow(0x1, "no");
6160 %}
6161 %}
6162
6163 // Operands for bound floating pointer register arguments
6164 operand rxmm0() %{
6165 constraint(ALLOC_IN_RC(xmm0_reg));
6166 match(VecX);
6167 format%{%}
6168 interface(REG_INTER);
6169 %}
6170
6171 // Vectors
6172
6173 // Dummy generic vector class. Should be used for all vector operands.
6174 // Replaced with vec[SDXYZ] during post-selection pass.
6175 operand vec() %{
6176 constraint(ALLOC_IN_RC(dynamic));
6177 match(VecX);
6178 match(VecY);
6179 match(VecZ);
6180 match(VecS);
6181 match(VecD);
6182
6183 format %{ %}
6184 interface(REG_INTER);
6185 %}
6186
6187 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6188 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6189 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6190 // runtime code generation via reg_class_dynamic.
6191 operand legVec() %{
6192 constraint(ALLOC_IN_RC(dynamic));
6193 match(VecX);
6194 match(VecY);
6195 match(VecZ);
6196 match(VecS);
6197 match(VecD);
6198
6199 format %{ %}
6200 interface(REG_INTER);
6201 %}
6202
6203 // Replaces vec during post-selection cleanup. See above.
6204 operand vecS() %{
6205 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6206 match(VecS);
6207
6208 format %{ %}
6209 interface(REG_INTER);
6210 %}
6211
6212 // Replaces legVec during post-selection cleanup. See above.
6213 operand legVecS() %{
6214 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6215 match(VecS);
6216
6217 format %{ %}
6218 interface(REG_INTER);
6219 %}
6220
6221 // Replaces vec during post-selection cleanup. See above.
6222 operand vecD() %{
6223 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6224 match(VecD);
6225
6226 format %{ %}
6227 interface(REG_INTER);
6228 %}
6229
6230 // Replaces legVec during post-selection cleanup. See above.
6231 operand legVecD() %{
6232 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6233 match(VecD);
6234
6235 format %{ %}
6236 interface(REG_INTER);
6237 %}
6238
6239 // Replaces vec during post-selection cleanup. See above.
6240 operand vecX() %{
6241 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6242 match(VecX);
6243
6244 format %{ %}
6245 interface(REG_INTER);
6246 %}
6247
6248 // Replaces legVec during post-selection cleanup. See above.
6249 operand legVecX() %{
6250 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6251 match(VecX);
6252
6253 format %{ %}
6254 interface(REG_INTER);
6255 %}
6256
6257 // Replaces vec during post-selection cleanup. See above.
6258 operand vecY() %{
6259 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6260 match(VecY);
6261
6262 format %{ %}
6263 interface(REG_INTER);
6264 %}
6265
6266 // Replaces legVec during post-selection cleanup. See above.
6267 operand legVecY() %{
6268 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6269 match(VecY);
6270
6271 format %{ %}
6272 interface(REG_INTER);
6273 %}
6274
6275 // Replaces vec during post-selection cleanup. See above.
6276 operand vecZ() %{
6277 constraint(ALLOC_IN_RC(vectorz_reg));
6278 match(VecZ);
6279
6280 format %{ %}
6281 interface(REG_INTER);
6282 %}
6283
6284 // Replaces legVec during post-selection cleanup. See above.
6285 operand legVecZ() %{
6286 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6287 match(VecZ);
6288
6289 format %{ %}
6290 interface(REG_INTER);
6291 %}
6292
6293 //----------OPERAND CLASSES----------------------------------------------------
6294 // Operand Classes are groups of operands that are used as to simplify
6295 // instruction definitions by not requiring the AD writer to specify separate
6296 // instructions for every form of operand when the instruction accepts
6297 // multiple operand types with the same basic encoding and format. The classic
6298 // case of this is memory operands.
6299
6300 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6301 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6302 indCompressedOop, indCompressedOopOffset,
6303 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6304 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6305 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6306
6307 //----------PIPELINE-----------------------------------------------------------
6308 // Rules which define the behavior of the target architectures pipeline.
6309 pipeline %{
6310
6311 //----------ATTRIBUTES---------------------------------------------------------
6312 attributes %{
6313 variable_size_instructions; // Fixed size instructions
6314 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6315 instruction_unit_size = 1; // An instruction is 1 bytes long
6316 instruction_fetch_unit_size = 16; // The processor fetches one line
6317 instruction_fetch_units = 1; // of 16 bytes
6318 %}
6319
6320 //----------RESOURCES----------------------------------------------------------
6321 // Resources are the functional units available to the machine
6322
6323 // Generic P2/P3 pipeline
6324 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6325 // 3 instructions decoded per cycle.
6326 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6327 // 3 ALU op, only ALU0 handles mul instructions.
6328 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6329 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6330 BR, FPU,
6331 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6332
6333 //----------PIPELINE DESCRIPTION-----------------------------------------------
6334 // Pipeline Description specifies the stages in the machine's pipeline
6335
6336 // Generic P2/P3 pipeline
6337 pipe_desc(S0, S1, S2, S3, S4, S5);
6338
6339 //----------PIPELINE CLASSES---------------------------------------------------
6340 // Pipeline Classes describe the stages in which input and output are
6341 // referenced by the hardware pipeline.
6342
6343 // Naming convention: ialu or fpu
6344 // Then: _reg
6345 // Then: _reg if there is a 2nd register
6346 // Then: _long if it's a pair of instructions implementing a long
6347 // Then: _fat if it requires the big decoder
6348 // Or: _mem if it requires the big decoder and a memory unit.
6349
6350 // Integer ALU reg operation
6351 pipe_class ialu_reg(rRegI dst)
6352 %{
6353 single_instruction;
6354 dst : S4(write);
6355 dst : S3(read);
6356 DECODE : S0; // any decoder
6357 ALU : S3; // any alu
6358 %}
6359
6360 // Long ALU reg operation
6361 pipe_class ialu_reg_long(rRegL dst)
6362 %{
6363 instruction_count(2);
6364 dst : S4(write);
6365 dst : S3(read);
6366 DECODE : S0(2); // any 2 decoders
6367 ALU : S3(2); // both alus
6368 %}
6369
6370 // Integer ALU reg operation using big decoder
6371 pipe_class ialu_reg_fat(rRegI dst)
6372 %{
6373 single_instruction;
6374 dst : S4(write);
6375 dst : S3(read);
6376 D0 : S0; // big decoder only
6377 ALU : S3; // any alu
6378 %}
6379
6380 // Integer ALU reg-reg operation
6381 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6382 %{
6383 single_instruction;
6384 dst : S4(write);
6385 src : S3(read);
6386 DECODE : S0; // any decoder
6387 ALU : S3; // any alu
6388 %}
6389
6390 // Integer ALU reg-reg operation
6391 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6392 %{
6393 single_instruction;
6394 dst : S4(write);
6395 src : S3(read);
6396 D0 : S0; // big decoder only
6397 ALU : S3; // any alu
6398 %}
6399
6400 // Integer ALU reg-mem operation
6401 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6402 %{
6403 single_instruction;
6404 dst : S5(write);
6405 mem : S3(read);
6406 D0 : S0; // big decoder only
6407 ALU : S4; // any alu
6408 MEM : S3; // any mem
6409 %}
6410
6411 // Integer mem operation (prefetch)
6412 pipe_class ialu_mem(memory mem)
6413 %{
6414 single_instruction;
6415 mem : S3(read);
6416 D0 : S0; // big decoder only
6417 MEM : S3; // any mem
6418 %}
6419
6420 // Integer Store to Memory
6421 pipe_class ialu_mem_reg(memory mem, rRegI src)
6422 %{
6423 single_instruction;
6424 mem : S3(read);
6425 src : S5(read);
6426 D0 : S0; // big decoder only
6427 ALU : S4; // any alu
6428 MEM : S3;
6429 %}
6430
6431 // // Long Store to Memory
6432 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6433 // %{
6434 // instruction_count(2);
6435 // mem : S3(read);
6436 // src : S5(read);
6437 // D0 : S0(2); // big decoder only; twice
6438 // ALU : S4(2); // any 2 alus
6439 // MEM : S3(2); // Both mems
6440 // %}
6441
6442 // Integer Store to Memory
6443 pipe_class ialu_mem_imm(memory mem)
6444 %{
6445 single_instruction;
6446 mem : S3(read);
6447 D0 : S0; // big decoder only
6448 ALU : S4; // any alu
6449 MEM : S3;
6450 %}
6451
6452 // Integer ALU0 reg-reg operation
6453 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6454 %{
6455 single_instruction;
6456 dst : S4(write);
6457 src : S3(read);
6458 D0 : S0; // Big decoder only
6459 ALU0 : S3; // only alu0
6460 %}
6461
6462 // Integer ALU0 reg-mem operation
6463 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6464 %{
6465 single_instruction;
6466 dst : S5(write);
6467 mem : S3(read);
6468 D0 : S0; // big decoder only
6469 ALU0 : S4; // ALU0 only
6470 MEM : S3; // any mem
6471 %}
6472
6473 // Integer ALU reg-reg operation
6474 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6475 %{
6476 single_instruction;
6477 cr : S4(write);
6478 src1 : S3(read);
6479 src2 : S3(read);
6480 DECODE : S0; // any decoder
6481 ALU : S3; // any alu
6482 %}
6483
6484 // Integer ALU reg-imm operation
6485 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6486 %{
6487 single_instruction;
6488 cr : S4(write);
6489 src1 : S3(read);
6490 DECODE : S0; // any decoder
6491 ALU : S3; // any alu
6492 %}
6493
6494 // Integer ALU reg-mem operation
6495 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6496 %{
6497 single_instruction;
6498 cr : S4(write);
6499 src1 : S3(read);
6500 src2 : S3(read);
6501 D0 : S0; // big decoder only
6502 ALU : S4; // any alu
6503 MEM : S3;
6504 %}
6505
6506 // Conditional move reg-reg
6507 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6508 %{
6509 instruction_count(4);
6510 y : S4(read);
6511 q : S3(read);
6512 p : S3(read);
6513 DECODE : S0(4); // any decoder
6514 %}
6515
6516 // Conditional move reg-reg
6517 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6518 %{
6519 single_instruction;
6520 dst : S4(write);
6521 src : S3(read);
6522 cr : S3(read);
6523 DECODE : S0; // any decoder
6524 %}
6525
6526 // Conditional move reg-mem
6527 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6528 %{
6529 single_instruction;
6530 dst : S4(write);
6531 src : S3(read);
6532 cr : S3(read);
6533 DECODE : S0; // any decoder
6534 MEM : S3;
6535 %}
6536
6537 // Conditional move reg-reg long
6538 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6539 %{
6540 single_instruction;
6541 dst : S4(write);
6542 src : S3(read);
6543 cr : S3(read);
6544 DECODE : S0(2); // any 2 decoders
6545 %}
6546
6547 // Float reg-reg operation
6548 pipe_class fpu_reg(regD dst)
6549 %{
6550 instruction_count(2);
6551 dst : S3(read);
6552 DECODE : S0(2); // any 2 decoders
6553 FPU : S3;
6554 %}
6555
6556 // Float reg-reg operation
6557 pipe_class fpu_reg_reg(regD dst, regD src)
6558 %{
6559 instruction_count(2);
6560 dst : S4(write);
6561 src : S3(read);
6562 DECODE : S0(2); // any 2 decoders
6563 FPU : S3;
6564 %}
6565
6566 // Float reg-reg operation
6567 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6568 %{
6569 instruction_count(3);
6570 dst : S4(write);
6571 src1 : S3(read);
6572 src2 : S3(read);
6573 DECODE : S0(3); // any 3 decoders
6574 FPU : S3(2);
6575 %}
6576
6577 // Float reg-reg operation
6578 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6579 %{
6580 instruction_count(4);
6581 dst : S4(write);
6582 src1 : S3(read);
6583 src2 : S3(read);
6584 src3 : S3(read);
6585 DECODE : S0(4); // any 3 decoders
6586 FPU : S3(2);
6587 %}
6588
6589 // Float reg-reg operation
6590 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6591 %{
6592 instruction_count(4);
6593 dst : S4(write);
6594 src1 : S3(read);
6595 src2 : S3(read);
6596 src3 : S3(read);
6597 DECODE : S1(3); // any 3 decoders
6598 D0 : S0; // Big decoder only
6599 FPU : S3(2);
6600 MEM : S3;
6601 %}
6602
6603 // Float reg-mem operation
6604 pipe_class fpu_reg_mem(regD dst, memory mem)
6605 %{
6606 instruction_count(2);
6607 dst : S5(write);
6608 mem : S3(read);
6609 D0 : S0; // big decoder only
6610 DECODE : S1; // any decoder for FPU POP
6611 FPU : S4;
6612 MEM : S3; // any mem
6613 %}
6614
6615 // Float reg-mem operation
6616 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6617 %{
6618 instruction_count(3);
6619 dst : S5(write);
6620 src1 : S3(read);
6621 mem : S3(read);
6622 D0 : S0; // big decoder only
6623 DECODE : S1(2); // any decoder for FPU POP
6624 FPU : S4;
6625 MEM : S3; // any mem
6626 %}
6627
6628 // Float mem-reg operation
6629 pipe_class fpu_mem_reg(memory mem, regD src)
6630 %{
6631 instruction_count(2);
6632 src : S5(read);
6633 mem : S3(read);
6634 DECODE : S0; // any decoder for FPU PUSH
6635 D0 : S1; // big decoder only
6636 FPU : S4;
6637 MEM : S3; // any mem
6638 %}
6639
6640 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6641 %{
6642 instruction_count(3);
6643 src1 : S3(read);
6644 src2 : S3(read);
6645 mem : S3(read);
6646 DECODE : S0(2); // any decoder for FPU PUSH
6647 D0 : S1; // big decoder only
6648 FPU : S4;
6649 MEM : S3; // any mem
6650 %}
6651
6652 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6653 %{
6654 instruction_count(3);
6655 src1 : S3(read);
6656 src2 : S3(read);
6657 mem : S4(read);
6658 DECODE : S0; // any decoder for FPU PUSH
6659 D0 : S0(2); // big decoder only
6660 FPU : S4;
6661 MEM : S3(2); // any mem
6662 %}
6663
6664 pipe_class fpu_mem_mem(memory dst, memory src1)
6665 %{
6666 instruction_count(2);
6667 src1 : S3(read);
6668 dst : S4(read);
6669 D0 : S0(2); // big decoder only
6670 MEM : S3(2); // any mem
6671 %}
6672
6673 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6674 %{
6675 instruction_count(3);
6676 src1 : S3(read);
6677 src2 : S3(read);
6678 dst : S4(read);
6679 D0 : S0(3); // big decoder only
6680 FPU : S4;
6681 MEM : S3(3); // any mem
6682 %}
6683
6684 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6685 %{
6686 instruction_count(3);
6687 src1 : S4(read);
6688 mem : S4(read);
6689 DECODE : S0; // any decoder for FPU PUSH
6690 D0 : S0(2); // big decoder only
6691 FPU : S4;
6692 MEM : S3(2); // any mem
6693 %}
6694
6695 // Float load constant
6696 pipe_class fpu_reg_con(regD dst)
6697 %{
6698 instruction_count(2);
6699 dst : S5(write);
6700 D0 : S0; // big decoder only for the load
6701 DECODE : S1; // any decoder for FPU POP
6702 FPU : S4;
6703 MEM : S3; // any mem
6704 %}
6705
6706 // Float load constant
6707 pipe_class fpu_reg_reg_con(regD dst, regD src)
6708 %{
6709 instruction_count(3);
6710 dst : S5(write);
6711 src : S3(read);
6712 D0 : S0; // big decoder only for the load
6713 DECODE : S1(2); // any decoder for FPU POP
6714 FPU : S4;
6715 MEM : S3; // any mem
6716 %}
6717
6718 // UnConditional branch
6719 pipe_class pipe_jmp(label labl)
6720 %{
6721 single_instruction;
6722 BR : S3;
6723 %}
6724
6725 // Conditional branch
6726 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6727 %{
6728 single_instruction;
6729 cr : S1(read);
6730 BR : S3;
6731 %}
6732
6733 // Allocation idiom
6734 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6735 %{
6736 instruction_count(1); force_serialization;
6737 fixed_latency(6);
6738 heap_ptr : S3(read);
6739 DECODE : S0(3);
6740 D0 : S2;
6741 MEM : S3;
6742 ALU : S3(2);
6743 dst : S5(write);
6744 BR : S5;
6745 %}
6746
6747 // Generic big/slow expanded idiom
6748 pipe_class pipe_slow()
6749 %{
6750 instruction_count(10); multiple_bundles; force_serialization;
6751 fixed_latency(100);
6752 D0 : S0(2);
6753 MEM : S3(2);
6754 %}
6755
6756 // The real do-nothing guy
6757 pipe_class empty()
6758 %{
6759 instruction_count(0);
6760 %}
6761
6762 // Define the class for the Nop node
6763 define
6764 %{
6765 MachNop = empty;
6766 %}
6767
6768 %}
6769
6770 //----------INSTRUCTIONS-------------------------------------------------------
6771 //
6772 // match -- States which machine-independent subtree may be replaced
6773 // by this instruction.
6774 // ins_cost -- The estimated cost of this instruction is used by instruction
6775 // selection to identify a minimum cost tree of machine
6776 // instructions that matches a tree of machine-independent
6777 // instructions.
6778 // format -- A string providing the disassembly for this instruction.
6779 // The value of an instruction's operand may be inserted
6780 // by referring to it with a '$' prefix.
6781 // opcode -- Three instruction opcodes may be provided. These are referred
6782 // to within an encode class as $primary, $secondary, and $tertiary
6783 // rrspectively. The primary opcode is commonly used to
6784 // indicate the type of machine instruction, while secondary
6785 // and tertiary are often used for prefix options or addressing
6786 // modes.
6787 // ins_encode -- A list of encode classes with parameters. The encode class
6788 // name must have been defined in an 'enc_class' specification
6789 // in the encode section of the architecture description.
6790
6791 // ============================================================================
6792
6793 instruct ShouldNotReachHere() %{
6794 match(Halt);
6795 format %{ "stop\t# ShouldNotReachHere" %}
6796 ins_encode %{
6797 if (is_reachable()) {
6798 const char* str = __ code_string(_halt_reason);
6799 __ stop(str);
6800 }
6801 %}
6802 ins_pipe(pipe_slow);
6803 %}
6804
6805 // ============================================================================
6806
6807 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6808 // Load Float
6809 instruct MoveF2VL(vlRegF dst, regF src) %{
6810 match(Set dst src);
6811 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6812 ins_encode %{
6813 ShouldNotReachHere();
6814 %}
6815 ins_pipe( fpu_reg_reg );
6816 %}
6817
6818 // Load Float
6819 instruct MoveF2LEG(legRegF dst, regF src) %{
6820 match(Set dst src);
6821 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6822 ins_encode %{
6823 ShouldNotReachHere();
6824 %}
6825 ins_pipe( fpu_reg_reg );
6826 %}
6827
6828 // Load Float
6829 instruct MoveVL2F(regF dst, vlRegF src) %{
6830 match(Set dst src);
6831 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6832 ins_encode %{
6833 ShouldNotReachHere();
6834 %}
6835 ins_pipe( fpu_reg_reg );
6836 %}
6837
6838 // Load Float
6839 instruct MoveLEG2F(regF dst, legRegF src) %{
6840 match(Set dst src);
6841 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6842 ins_encode %{
6843 ShouldNotReachHere();
6844 %}
6845 ins_pipe( fpu_reg_reg );
6846 %}
6847
6848 // Load Double
6849 instruct MoveD2VL(vlRegD dst, regD src) %{
6850 match(Set dst src);
6851 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6852 ins_encode %{
6853 ShouldNotReachHere();
6854 %}
6855 ins_pipe( fpu_reg_reg );
6856 %}
6857
6858 // Load Double
6859 instruct MoveD2LEG(legRegD dst, regD src) %{
6860 match(Set dst src);
6861 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6862 ins_encode %{
6863 ShouldNotReachHere();
6864 %}
6865 ins_pipe( fpu_reg_reg );
6866 %}
6867
6868 // Load Double
6869 instruct MoveVL2D(regD dst, vlRegD src) %{
6870 match(Set dst src);
6871 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6872 ins_encode %{
6873 ShouldNotReachHere();
6874 %}
6875 ins_pipe( fpu_reg_reg );
6876 %}
6877
6878 // Load Double
6879 instruct MoveLEG2D(regD dst, legRegD src) %{
6880 match(Set dst src);
6881 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6882 ins_encode %{
6883 ShouldNotReachHere();
6884 %}
6885 ins_pipe( fpu_reg_reg );
6886 %}
6887
6888 //----------Load/Store/Move Instructions---------------------------------------
6889 //----------Load Instructions--------------------------------------------------
6890
6891 // Load Byte (8 bit signed)
6892 instruct loadB(rRegI dst, memory mem)
6893 %{
6894 match(Set dst (LoadB mem));
6895
6896 ins_cost(125);
6897 format %{ "movsbl $dst, $mem\t# byte" %}
6898
6899 ins_encode %{
6900 __ movsbl($dst$$Register, $mem$$Address);
6901 %}
6902
6903 ins_pipe(ialu_reg_mem);
6904 %}
6905
6906 // Load Byte (8 bit signed) into Long Register
6907 instruct loadB2L(rRegL dst, memory mem)
6908 %{
6909 match(Set dst (ConvI2L (LoadB mem)));
6910
6911 ins_cost(125);
6912 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6913
6914 ins_encode %{
6915 __ movsbq($dst$$Register, $mem$$Address);
6916 %}
6917
6918 ins_pipe(ialu_reg_mem);
6919 %}
6920
6921 // Load Unsigned Byte (8 bit UNsigned)
6922 instruct loadUB(rRegI dst, memory mem)
6923 %{
6924 match(Set dst (LoadUB mem));
6925
6926 ins_cost(125);
6927 format %{ "movzbl $dst, $mem\t# ubyte" %}
6928
6929 ins_encode %{
6930 __ movzbl($dst$$Register, $mem$$Address);
6931 %}
6932
6933 ins_pipe(ialu_reg_mem);
6934 %}
6935
6936 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6937 instruct loadUB2L(rRegL dst, memory mem)
6938 %{
6939 match(Set dst (ConvI2L (LoadUB mem)));
6940
6941 ins_cost(125);
6942 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6943
6944 ins_encode %{
6945 __ movzbq($dst$$Register, $mem$$Address);
6946 %}
6947
6948 ins_pipe(ialu_reg_mem);
6949 %}
6950
6951 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6952 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6953 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6954 effect(KILL cr);
6955
6956 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6957 "andl $dst, right_n_bits($mask, 8)" %}
6958 ins_encode %{
6959 Register Rdst = $dst$$Register;
6960 __ movzbq(Rdst, $mem$$Address);
6961 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6962 %}
6963 ins_pipe(ialu_reg_mem);
6964 %}
6965
6966 // Load Short (16 bit signed)
6967 instruct loadS(rRegI dst, memory mem)
6968 %{
6969 match(Set dst (LoadS mem));
6970
6971 ins_cost(125);
6972 format %{ "movswl $dst, $mem\t# short" %}
6973
6974 ins_encode %{
6975 __ movswl($dst$$Register, $mem$$Address);
6976 %}
6977
6978 ins_pipe(ialu_reg_mem);
6979 %}
6980
6981 // Load Short (16 bit signed) to Byte (8 bit signed)
6982 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6983 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6984
6985 ins_cost(125);
6986 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6987 ins_encode %{
6988 __ movsbl($dst$$Register, $mem$$Address);
6989 %}
6990 ins_pipe(ialu_reg_mem);
6991 %}
6992
6993 // Load Short (16 bit signed) into Long Register
6994 instruct loadS2L(rRegL dst, memory mem)
6995 %{
6996 match(Set dst (ConvI2L (LoadS mem)));
6997
6998 ins_cost(125);
6999 format %{ "movswq $dst, $mem\t# short -> long" %}
7000
7001 ins_encode %{
7002 __ movswq($dst$$Register, $mem$$Address);
7003 %}
7004
7005 ins_pipe(ialu_reg_mem);
7006 %}
7007
7008 // Load Unsigned Short/Char (16 bit UNsigned)
7009 instruct loadUS(rRegI dst, memory mem)
7010 %{
7011 match(Set dst (LoadUS mem));
7012
7013 ins_cost(125);
7014 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7015
7016 ins_encode %{
7017 __ movzwl($dst$$Register, $mem$$Address);
7018 %}
7019
7020 ins_pipe(ialu_reg_mem);
7021 %}
7022
7023 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7024 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7025 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7026
7027 ins_cost(125);
7028 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7029 ins_encode %{
7030 __ movsbl($dst$$Register, $mem$$Address);
7031 %}
7032 ins_pipe(ialu_reg_mem);
7033 %}
7034
7035 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7036 instruct loadUS2L(rRegL dst, memory mem)
7037 %{
7038 match(Set dst (ConvI2L (LoadUS mem)));
7039
7040 ins_cost(125);
7041 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7042
7043 ins_encode %{
7044 __ movzwq($dst$$Register, $mem$$Address);
7045 %}
7046
7047 ins_pipe(ialu_reg_mem);
7048 %}
7049
7050 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7051 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7052 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7053
7054 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7055 ins_encode %{
7056 __ movzbq($dst$$Register, $mem$$Address);
7057 %}
7058 ins_pipe(ialu_reg_mem);
7059 %}
7060
7061 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7062 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7063 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7064 effect(KILL cr);
7065
7066 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7067 "andl $dst, right_n_bits($mask, 16)" %}
7068 ins_encode %{
7069 Register Rdst = $dst$$Register;
7070 __ movzwq(Rdst, $mem$$Address);
7071 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7072 %}
7073 ins_pipe(ialu_reg_mem);
7074 %}
7075
7076 // Load Integer
7077 instruct loadI(rRegI dst, memory mem)
7078 %{
7079 match(Set dst (LoadI mem));
7080
7081 ins_cost(125);
7082 format %{ "movl $dst, $mem\t# int" %}
7083
7084 ins_encode %{
7085 __ movl($dst$$Register, $mem$$Address);
7086 %}
7087
7088 ins_pipe(ialu_reg_mem);
7089 %}
7090
7091 // Load Integer (32 bit signed) to Byte (8 bit signed)
7092 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7093 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7094
7095 ins_cost(125);
7096 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7097 ins_encode %{
7098 __ movsbl($dst$$Register, $mem$$Address);
7099 %}
7100 ins_pipe(ialu_reg_mem);
7101 %}
7102
7103 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7104 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7105 match(Set dst (AndI (LoadI mem) mask));
7106
7107 ins_cost(125);
7108 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7109 ins_encode %{
7110 __ movzbl($dst$$Register, $mem$$Address);
7111 %}
7112 ins_pipe(ialu_reg_mem);
7113 %}
7114
7115 // Load Integer (32 bit signed) to Short (16 bit signed)
7116 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7117 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7118
7119 ins_cost(125);
7120 format %{ "movswl $dst, $mem\t# int -> short" %}
7121 ins_encode %{
7122 __ movswl($dst$$Register, $mem$$Address);
7123 %}
7124 ins_pipe(ialu_reg_mem);
7125 %}
7126
7127 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7128 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7129 match(Set dst (AndI (LoadI mem) mask));
7130
7131 ins_cost(125);
7132 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7133 ins_encode %{
7134 __ movzwl($dst$$Register, $mem$$Address);
7135 %}
7136 ins_pipe(ialu_reg_mem);
7137 %}
7138
7139 // Load Integer into Long Register
7140 instruct loadI2L(rRegL dst, memory mem)
7141 %{
7142 match(Set dst (ConvI2L (LoadI mem)));
7143
7144 ins_cost(125);
7145 format %{ "movslq $dst, $mem\t# int -> long" %}
7146
7147 ins_encode %{
7148 __ movslq($dst$$Register, $mem$$Address);
7149 %}
7150
7151 ins_pipe(ialu_reg_mem);
7152 %}
7153
7154 // Load Integer with mask 0xFF into Long Register
7155 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7156 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7157
7158 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7159 ins_encode %{
7160 __ movzbq($dst$$Register, $mem$$Address);
7161 %}
7162 ins_pipe(ialu_reg_mem);
7163 %}
7164
7165 // Load Integer with mask 0xFFFF into Long Register
7166 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7167 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7168
7169 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7170 ins_encode %{
7171 __ movzwq($dst$$Register, $mem$$Address);
7172 %}
7173 ins_pipe(ialu_reg_mem);
7174 %}
7175
7176 // Load Integer with a 31-bit mask into Long Register
7177 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7178 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7179 effect(KILL cr);
7180
7181 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7182 "andl $dst, $mask" %}
7183 ins_encode %{
7184 Register Rdst = $dst$$Register;
7185 __ movl(Rdst, $mem$$Address);
7186 __ andl(Rdst, $mask$$constant);
7187 %}
7188 ins_pipe(ialu_reg_mem);
7189 %}
7190
7191 // Load Unsigned Integer into Long Register
7192 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7193 %{
7194 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7195
7196 ins_cost(125);
7197 format %{ "movl $dst, $mem\t# uint -> long" %}
7198
7199 ins_encode %{
7200 __ movl($dst$$Register, $mem$$Address);
7201 %}
7202
7203 ins_pipe(ialu_reg_mem);
7204 %}
7205
7206 // Load Long
7207 instruct loadL(rRegL dst, memory mem)
7208 %{
7209 match(Set dst (LoadL mem));
7210
7211 ins_cost(125);
7212 format %{ "movq $dst, $mem\t# long" %}
7213
7214 ins_encode %{
7215 __ movq($dst$$Register, $mem$$Address);
7216 %}
7217
7218 ins_pipe(ialu_reg_mem); // XXX
7219 %}
7220
7221 // Load Range
7222 instruct loadRange(rRegI dst, memory mem)
7223 %{
7224 match(Set dst (LoadRange mem));
7225
7226 ins_cost(125); // XXX
7227 format %{ "movl $dst, $mem\t# range" %}
7228 ins_encode %{
7229 __ movl($dst$$Register, $mem$$Address);
7230 %}
7231 ins_pipe(ialu_reg_mem);
7232 %}
7233
7234 // Load Pointer
7235 instruct loadP(rRegP dst, memory mem)
7236 %{
7237 match(Set dst (LoadP mem));
7238 predicate(n->as_Load()->barrier_data() == 0);
7239
7240 ins_cost(125); // XXX
7241 format %{ "movq $dst, $mem\t# ptr" %}
7242 ins_encode %{
7243 __ movq($dst$$Register, $mem$$Address);
7244 %}
7245 ins_pipe(ialu_reg_mem); // XXX
7246 %}
7247
7248 // Load Compressed Pointer
7249 instruct loadN(rRegN dst, memory mem)
7250 %{
7251 predicate(n->as_Load()->barrier_data() == 0);
7252 match(Set dst (LoadN mem));
7253
7254 ins_cost(125); // XXX
7255 format %{ "movl $dst, $mem\t# compressed ptr" %}
7256 ins_encode %{
7257 __ movl($dst$$Register, $mem$$Address);
7258 %}
7259 ins_pipe(ialu_reg_mem); // XXX
7260 %}
7261
7262
7263 // Load Klass Pointer
7264 instruct loadKlass(rRegP dst, memory mem)
7265 %{
7266 match(Set dst (LoadKlass mem));
7267
7268 ins_cost(125); // XXX
7269 format %{ "movq $dst, $mem\t# class" %}
7270 ins_encode %{
7271 __ movq($dst$$Register, $mem$$Address);
7272 %}
7273 ins_pipe(ialu_reg_mem); // XXX
7274 %}
7275
7276 // Load narrow Klass Pointer
7277 instruct loadNKlass(rRegN dst, memory mem)
7278 %{
7279 predicate(!UseCompactObjectHeaders);
7280 match(Set dst (LoadNKlass mem));
7281
7282 ins_cost(125); // XXX
7283 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7284 ins_encode %{
7285 __ movl($dst$$Register, $mem$$Address);
7286 %}
7287 ins_pipe(ialu_reg_mem); // XXX
7288 %}
7289
7290 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7291 %{
7292 predicate(UseCompactObjectHeaders);
7293 match(Set dst (LoadNKlass mem));
7294 effect(KILL cr);
7295 ins_cost(125);
7296 format %{
7297 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7298 "shrl $dst, markWord::klass_shift_at_offset"
7299 %}
7300 ins_encode %{
7301 if (UseAPX) {
7302 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7303 }
7304 else {
7305 __ movl($dst$$Register, $mem$$Address);
7306 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7307 }
7308 %}
7309 ins_pipe(ialu_reg_mem);
7310 %}
7311
7312 // Load Float
7313 instruct loadF(regF dst, memory mem)
7314 %{
7315 match(Set dst (LoadF mem));
7316
7317 ins_cost(145); // XXX
7318 format %{ "movss $dst, $mem\t# float" %}
7319 ins_encode %{
7320 __ movflt($dst$$XMMRegister, $mem$$Address);
7321 %}
7322 ins_pipe(pipe_slow); // XXX
7323 %}
7324
7325 // Load Double
7326 instruct loadD_partial(regD dst, memory mem)
7327 %{
7328 predicate(!UseXmmLoadAndClearUpper);
7329 match(Set dst (LoadD mem));
7330
7331 ins_cost(145); // XXX
7332 format %{ "movlpd $dst, $mem\t# double" %}
7333 ins_encode %{
7334 __ movdbl($dst$$XMMRegister, $mem$$Address);
7335 %}
7336 ins_pipe(pipe_slow); // XXX
7337 %}
7338
7339 instruct loadD(regD dst, memory mem)
7340 %{
7341 predicate(UseXmmLoadAndClearUpper);
7342 match(Set dst (LoadD mem));
7343
7344 ins_cost(145); // XXX
7345 format %{ "movsd $dst, $mem\t# double" %}
7346 ins_encode %{
7347 __ movdbl($dst$$XMMRegister, $mem$$Address);
7348 %}
7349 ins_pipe(pipe_slow); // XXX
7350 %}
7351
7352 // max = java.lang.Math.max(float a, float b)
7353 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7354 predicate(VM_Version::supports_avx10_2());
7355 match(Set dst (MaxF a b));
7356 format %{ "maxF $dst, $a, $b" %}
7357 ins_encode %{
7358 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7359 %}
7360 ins_pipe( pipe_slow );
7361 %}
7362
7363 // max = java.lang.Math.max(float a, float b)
7364 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7365 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7366 match(Set dst (MaxF a b));
7367 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7368 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7369 ins_encode %{
7370 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7371 %}
7372 ins_pipe( pipe_slow );
7373 %}
7374
7375 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7376 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7377 match(Set dst (MaxF a b));
7378 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7379
7380 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7381 ins_encode %{
7382 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7383 false /*min*/, true /*single*/);
7384 %}
7385 ins_pipe( pipe_slow );
7386 %}
7387
7388 // max = java.lang.Math.max(double a, double b)
7389 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7390 predicate(VM_Version::supports_avx10_2());
7391 match(Set dst (MaxD a b));
7392 format %{ "maxD $dst, $a, $b" %}
7393 ins_encode %{
7394 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7395 %}
7396 ins_pipe( pipe_slow );
7397 %}
7398
7399 // max = java.lang.Math.max(double a, double b)
7400 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7401 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7402 match(Set dst (MaxD a b));
7403 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7404 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7405 ins_encode %{
7406 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7407 %}
7408 ins_pipe( pipe_slow );
7409 %}
7410
7411 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7412 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7413 match(Set dst (MaxD a b));
7414 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7415
7416 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7417 ins_encode %{
7418 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7419 false /*min*/, false /*single*/);
7420 %}
7421 ins_pipe( pipe_slow );
7422 %}
7423
7424 // max = java.lang.Math.min(float a, float b)
7425 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7426 predicate(VM_Version::supports_avx10_2());
7427 match(Set dst (MinF a b));
7428 format %{ "minF $dst, $a, $b" %}
7429 ins_encode %{
7430 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7431 %}
7432 ins_pipe( pipe_slow );
7433 %}
7434
7435 // min = java.lang.Math.min(float a, float b)
7436 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7437 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7438 match(Set dst (MinF a b));
7439 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7440 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7441 ins_encode %{
7442 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7443 %}
7444 ins_pipe( pipe_slow );
7445 %}
7446
7447 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7448 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7449 match(Set dst (MinF a b));
7450 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7451
7452 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7453 ins_encode %{
7454 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7455 true /*min*/, true /*single*/);
7456 %}
7457 ins_pipe( pipe_slow );
7458 %}
7459
7460 // max = java.lang.Math.min(double a, double b)
7461 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7462 predicate(VM_Version::supports_avx10_2());
7463 match(Set dst (MinD a b));
7464 format %{ "minD $dst, $a, $b" %}
7465 ins_encode %{
7466 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7467 %}
7468 ins_pipe( pipe_slow );
7469 %}
7470
7471 // min = java.lang.Math.min(double a, double b)
7472 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7473 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7474 match(Set dst (MinD a b));
7475 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7476 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7477 ins_encode %{
7478 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7479 %}
7480 ins_pipe( pipe_slow );
7481 %}
7482
7483 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7484 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7485 match(Set dst (MinD a b));
7486 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7487
7488 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7489 ins_encode %{
7490 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7491 true /*min*/, false /*single*/);
7492 %}
7493 ins_pipe( pipe_slow );
7494 %}
7495
7496 // Load Effective Address
7497 instruct leaP8(rRegP dst, indOffset8 mem)
7498 %{
7499 match(Set dst mem);
7500
7501 ins_cost(110); // XXX
7502 format %{ "leaq $dst, $mem\t# ptr 8" %}
7503 ins_encode %{
7504 __ leaq($dst$$Register, $mem$$Address);
7505 %}
7506 ins_pipe(ialu_reg_reg_fat);
7507 %}
7508
7509 instruct leaP32(rRegP dst, indOffset32 mem)
7510 %{
7511 match(Set dst mem);
7512
7513 ins_cost(110);
7514 format %{ "leaq $dst, $mem\t# ptr 32" %}
7515 ins_encode %{
7516 __ leaq($dst$$Register, $mem$$Address);
7517 %}
7518 ins_pipe(ialu_reg_reg_fat);
7519 %}
7520
7521 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7522 %{
7523 match(Set dst mem);
7524
7525 ins_cost(110);
7526 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7527 ins_encode %{
7528 __ leaq($dst$$Register, $mem$$Address);
7529 %}
7530 ins_pipe(ialu_reg_reg_fat);
7531 %}
7532
7533 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7534 %{
7535 match(Set dst mem);
7536
7537 ins_cost(110);
7538 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7539 ins_encode %{
7540 __ leaq($dst$$Register, $mem$$Address);
7541 %}
7542 ins_pipe(ialu_reg_reg_fat);
7543 %}
7544
7545 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7546 %{
7547 match(Set dst mem);
7548
7549 ins_cost(110);
7550 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7551 ins_encode %{
7552 __ leaq($dst$$Register, $mem$$Address);
7553 %}
7554 ins_pipe(ialu_reg_reg_fat);
7555 %}
7556
7557 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7558 %{
7559 match(Set dst mem);
7560
7561 ins_cost(110);
7562 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7563 ins_encode %{
7564 __ leaq($dst$$Register, $mem$$Address);
7565 %}
7566 ins_pipe(ialu_reg_reg_fat);
7567 %}
7568
7569 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7570 %{
7571 match(Set dst mem);
7572
7573 ins_cost(110);
7574 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7575 ins_encode %{
7576 __ leaq($dst$$Register, $mem$$Address);
7577 %}
7578 ins_pipe(ialu_reg_reg_fat);
7579 %}
7580
7581 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7582 %{
7583 match(Set dst mem);
7584
7585 ins_cost(110);
7586 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7587 ins_encode %{
7588 __ leaq($dst$$Register, $mem$$Address);
7589 %}
7590 ins_pipe(ialu_reg_reg_fat);
7591 %}
7592
7593 // Load Effective Address which uses Narrow (32-bits) oop
7594 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7595 %{
7596 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7597 match(Set dst mem);
7598
7599 ins_cost(110);
7600 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7601 ins_encode %{
7602 __ leaq($dst$$Register, $mem$$Address);
7603 %}
7604 ins_pipe(ialu_reg_reg_fat);
7605 %}
7606
7607 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7608 %{
7609 predicate(CompressedOops::shift() == 0);
7610 match(Set dst mem);
7611
7612 ins_cost(110); // XXX
7613 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7614 ins_encode %{
7615 __ leaq($dst$$Register, $mem$$Address);
7616 %}
7617 ins_pipe(ialu_reg_reg_fat);
7618 %}
7619
7620 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7621 %{
7622 predicate(CompressedOops::shift() == 0);
7623 match(Set dst mem);
7624
7625 ins_cost(110);
7626 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7627 ins_encode %{
7628 __ leaq($dst$$Register, $mem$$Address);
7629 %}
7630 ins_pipe(ialu_reg_reg_fat);
7631 %}
7632
7633 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7634 %{
7635 predicate(CompressedOops::shift() == 0);
7636 match(Set dst mem);
7637
7638 ins_cost(110);
7639 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7640 ins_encode %{
7641 __ leaq($dst$$Register, $mem$$Address);
7642 %}
7643 ins_pipe(ialu_reg_reg_fat);
7644 %}
7645
7646 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7647 %{
7648 predicate(CompressedOops::shift() == 0);
7649 match(Set dst mem);
7650
7651 ins_cost(110);
7652 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7653 ins_encode %{
7654 __ leaq($dst$$Register, $mem$$Address);
7655 %}
7656 ins_pipe(ialu_reg_reg_fat);
7657 %}
7658
7659 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7660 %{
7661 predicate(CompressedOops::shift() == 0);
7662 match(Set dst mem);
7663
7664 ins_cost(110);
7665 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7666 ins_encode %{
7667 __ leaq($dst$$Register, $mem$$Address);
7668 %}
7669 ins_pipe(ialu_reg_reg_fat);
7670 %}
7671
7672 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7673 %{
7674 predicate(CompressedOops::shift() == 0);
7675 match(Set dst mem);
7676
7677 ins_cost(110);
7678 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7679 ins_encode %{
7680 __ leaq($dst$$Register, $mem$$Address);
7681 %}
7682 ins_pipe(ialu_reg_reg_fat);
7683 %}
7684
7685 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7686 %{
7687 predicate(CompressedOops::shift() == 0);
7688 match(Set dst mem);
7689
7690 ins_cost(110);
7691 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7692 ins_encode %{
7693 __ leaq($dst$$Register, $mem$$Address);
7694 %}
7695 ins_pipe(ialu_reg_reg_fat);
7696 %}
7697
7698 instruct loadConI(rRegI dst, immI src)
7699 %{
7700 match(Set dst src);
7701
7702 format %{ "movl $dst, $src\t# int" %}
7703 ins_encode %{
7704 __ movl($dst$$Register, $src$$constant);
7705 %}
7706 ins_pipe(ialu_reg_fat); // XXX
7707 %}
7708
7709 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7710 %{
7711 match(Set dst src);
7712 effect(KILL cr);
7713
7714 ins_cost(50);
7715 format %{ "xorl $dst, $dst\t# int" %}
7716 ins_encode %{
7717 __ xorl($dst$$Register, $dst$$Register);
7718 %}
7719 ins_pipe(ialu_reg);
7720 %}
7721
7722 instruct loadConL(rRegL dst, immL src)
7723 %{
7724 match(Set dst src);
7725
7726 ins_cost(150);
7727 format %{ "movq $dst, $src\t# long" %}
7728 ins_encode %{
7729 __ mov64($dst$$Register, $src$$constant);
7730 %}
7731 ins_pipe(ialu_reg);
7732 %}
7733
7734 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7735 %{
7736 match(Set dst src);
7737 effect(KILL cr);
7738
7739 ins_cost(50);
7740 format %{ "xorl $dst, $dst\t# long" %}
7741 ins_encode %{
7742 __ xorl($dst$$Register, $dst$$Register);
7743 %}
7744 ins_pipe(ialu_reg); // XXX
7745 %}
7746
7747 instruct loadConUL32(rRegL dst, immUL32 src)
7748 %{
7749 match(Set dst src);
7750
7751 ins_cost(60);
7752 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7753 ins_encode %{
7754 __ movl($dst$$Register, $src$$constant);
7755 %}
7756 ins_pipe(ialu_reg);
7757 %}
7758
7759 instruct loadConL32(rRegL dst, immL32 src)
7760 %{
7761 match(Set dst src);
7762
7763 ins_cost(70);
7764 format %{ "movq $dst, $src\t# long (32-bit)" %}
7765 ins_encode %{
7766 __ movq($dst$$Register, $src$$constant);
7767 %}
7768 ins_pipe(ialu_reg);
7769 %}
7770
7771 instruct loadConP(rRegP dst, immP con) %{
7772 match(Set dst con);
7773
7774 format %{ "movq $dst, $con\t# ptr" %}
7775 ins_encode %{
7776 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7777 %}
7778 ins_pipe(ialu_reg_fat); // XXX
7779 %}
7780
7781 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7782 %{
7783 match(Set dst src);
7784 effect(KILL cr);
7785
7786 ins_cost(50);
7787 format %{ "xorl $dst, $dst\t# ptr" %}
7788 ins_encode %{
7789 __ xorl($dst$$Register, $dst$$Register);
7790 %}
7791 ins_pipe(ialu_reg);
7792 %}
7793
7794 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7795 %{
7796 match(Set dst src);
7797 effect(KILL cr);
7798
7799 ins_cost(60);
7800 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7801 ins_encode %{
7802 __ movl($dst$$Register, $src$$constant);
7803 %}
7804 ins_pipe(ialu_reg);
7805 %}
7806
7807 instruct loadConF(regF dst, immF con) %{
7808 match(Set dst con);
7809 ins_cost(125);
7810 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7811 ins_encode %{
7812 __ movflt($dst$$XMMRegister, $constantaddress($con));
7813 %}
7814 ins_pipe(pipe_slow);
7815 %}
7816
7817 instruct loadConH(regF dst, immH con) %{
7818 match(Set dst con);
7819 ins_cost(125);
7820 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7821 ins_encode %{
7822 __ movflt($dst$$XMMRegister, $constantaddress($con));
7823 %}
7824 ins_pipe(pipe_slow);
7825 %}
7826
7827 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7828 match(Set dst src);
7829 effect(KILL cr);
7830 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7831 ins_encode %{
7832 __ xorq($dst$$Register, $dst$$Register);
7833 %}
7834 ins_pipe(ialu_reg);
7835 %}
7836
7837 instruct loadConN(rRegN dst, immN src) %{
7838 match(Set dst src);
7839
7840 ins_cost(125);
7841 format %{ "movl $dst, $src\t# compressed ptr" %}
7842 ins_encode %{
7843 address con = (address)$src$$constant;
7844 if (con == nullptr) {
7845 ShouldNotReachHere();
7846 } else {
7847 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7848 }
7849 %}
7850 ins_pipe(ialu_reg_fat); // XXX
7851 %}
7852
7853 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7854 match(Set dst src);
7855
7856 ins_cost(125);
7857 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7858 ins_encode %{
7859 address con = (address)$src$$constant;
7860 if (con == nullptr) {
7861 ShouldNotReachHere();
7862 } else {
7863 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7864 }
7865 %}
7866 ins_pipe(ialu_reg_fat); // XXX
7867 %}
7868
7869 instruct loadConF0(regF dst, immF0 src)
7870 %{
7871 match(Set dst src);
7872 ins_cost(100);
7873
7874 format %{ "xorps $dst, $dst\t# float 0.0" %}
7875 ins_encode %{
7876 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7877 %}
7878 ins_pipe(pipe_slow);
7879 %}
7880
7881 // Use the same format since predicate() can not be used here.
7882 instruct loadConD(regD dst, immD con) %{
7883 match(Set dst con);
7884 ins_cost(125);
7885 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7886 ins_encode %{
7887 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7888 %}
7889 ins_pipe(pipe_slow);
7890 %}
7891
7892 instruct loadConD0(regD dst, immD0 src)
7893 %{
7894 match(Set dst src);
7895 ins_cost(100);
7896
7897 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7898 ins_encode %{
7899 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7900 %}
7901 ins_pipe(pipe_slow);
7902 %}
7903
7904 instruct loadSSI(rRegI dst, stackSlotI src)
7905 %{
7906 match(Set dst src);
7907
7908 ins_cost(125);
7909 format %{ "movl $dst, $src\t# int stk" %}
7910 ins_encode %{
7911 __ movl($dst$$Register, $src$$Address);
7912 %}
7913 ins_pipe(ialu_reg_mem);
7914 %}
7915
7916 instruct loadSSL(rRegL dst, stackSlotL src)
7917 %{
7918 match(Set dst src);
7919
7920 ins_cost(125);
7921 format %{ "movq $dst, $src\t# long stk" %}
7922 ins_encode %{
7923 __ movq($dst$$Register, $src$$Address);
7924 %}
7925 ins_pipe(ialu_reg_mem);
7926 %}
7927
7928 instruct loadSSP(rRegP dst, stackSlotP src)
7929 %{
7930 match(Set dst src);
7931
7932 ins_cost(125);
7933 format %{ "movq $dst, $src\t# ptr stk" %}
7934 ins_encode %{
7935 __ movq($dst$$Register, $src$$Address);
7936 %}
7937 ins_pipe(ialu_reg_mem);
7938 %}
7939
7940 instruct loadSSF(regF dst, stackSlotF src)
7941 %{
7942 match(Set dst src);
7943
7944 ins_cost(125);
7945 format %{ "movss $dst, $src\t# float stk" %}
7946 ins_encode %{
7947 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7948 %}
7949 ins_pipe(pipe_slow); // XXX
7950 %}
7951
7952 // Use the same format since predicate() can not be used here.
7953 instruct loadSSD(regD dst, stackSlotD src)
7954 %{
7955 match(Set dst src);
7956
7957 ins_cost(125);
7958 format %{ "movsd $dst, $src\t# double stk" %}
7959 ins_encode %{
7960 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7961 %}
7962 ins_pipe(pipe_slow); // XXX
7963 %}
7964
7965 // Prefetch instructions for allocation.
7966 // Must be safe to execute with invalid address (cannot fault).
7967
7968 instruct prefetchAlloc( memory mem ) %{
7969 predicate(AllocatePrefetchInstr==3);
7970 match(PrefetchAllocation mem);
7971 ins_cost(125);
7972
7973 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7974 ins_encode %{
7975 __ prefetchw($mem$$Address);
7976 %}
7977 ins_pipe(ialu_mem);
7978 %}
7979
7980 instruct prefetchAllocNTA( memory mem ) %{
7981 predicate(AllocatePrefetchInstr==0);
7982 match(PrefetchAllocation mem);
7983 ins_cost(125);
7984
7985 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7986 ins_encode %{
7987 __ prefetchnta($mem$$Address);
7988 %}
7989 ins_pipe(ialu_mem);
7990 %}
7991
7992 instruct prefetchAllocT0( memory mem ) %{
7993 predicate(AllocatePrefetchInstr==1);
7994 match(PrefetchAllocation mem);
7995 ins_cost(125);
7996
7997 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7998 ins_encode %{
7999 __ prefetcht0($mem$$Address);
8000 %}
8001 ins_pipe(ialu_mem);
8002 %}
8003
8004 instruct prefetchAllocT2( memory mem ) %{
8005 predicate(AllocatePrefetchInstr==2);
8006 match(PrefetchAllocation mem);
8007 ins_cost(125);
8008
8009 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8010 ins_encode %{
8011 __ prefetcht2($mem$$Address);
8012 %}
8013 ins_pipe(ialu_mem);
8014 %}
8015
8016 //----------Store Instructions-------------------------------------------------
8017
8018 // Store Byte
8019 instruct storeB(memory mem, rRegI src)
8020 %{
8021 match(Set mem (StoreB mem src));
8022
8023 ins_cost(125); // XXX
8024 format %{ "movb $mem, $src\t# byte" %}
8025 ins_encode %{
8026 __ movb($mem$$Address, $src$$Register);
8027 %}
8028 ins_pipe(ialu_mem_reg);
8029 %}
8030
8031 // Store Char/Short
8032 instruct storeC(memory mem, rRegI src)
8033 %{
8034 match(Set mem (StoreC mem src));
8035
8036 ins_cost(125); // XXX
8037 format %{ "movw $mem, $src\t# char/short" %}
8038 ins_encode %{
8039 __ movw($mem$$Address, $src$$Register);
8040 %}
8041 ins_pipe(ialu_mem_reg);
8042 %}
8043
8044 // Store Integer
8045 instruct storeI(memory mem, rRegI src)
8046 %{
8047 match(Set mem (StoreI mem src));
8048
8049 ins_cost(125); // XXX
8050 format %{ "movl $mem, $src\t# int" %}
8051 ins_encode %{
8052 __ movl($mem$$Address, $src$$Register);
8053 %}
8054 ins_pipe(ialu_mem_reg);
8055 %}
8056
8057 // Store Long
8058 instruct storeL(memory mem, rRegL src)
8059 %{
8060 match(Set mem (StoreL mem src));
8061
8062 ins_cost(125); // XXX
8063 format %{ "movq $mem, $src\t# long" %}
8064 ins_encode %{
8065 __ movq($mem$$Address, $src$$Register);
8066 %}
8067 ins_pipe(ialu_mem_reg); // XXX
8068 %}
8069
8070 // Store Pointer
8071 instruct storeP(memory mem, any_RegP src)
8072 %{
8073 predicate(n->as_Store()->barrier_data() == 0);
8074 match(Set mem (StoreP mem src));
8075
8076 ins_cost(125); // XXX
8077 format %{ "movq $mem, $src\t# ptr" %}
8078 ins_encode %{
8079 __ movq($mem$$Address, $src$$Register);
8080 %}
8081 ins_pipe(ialu_mem_reg);
8082 %}
8083
8084 instruct storeImmP0(memory mem, immP0 zero)
8085 %{
8086 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8087 match(Set mem (StoreP mem zero));
8088
8089 ins_cost(125); // XXX
8090 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8091 ins_encode %{
8092 __ movq($mem$$Address, r12);
8093 %}
8094 ins_pipe(ialu_mem_reg);
8095 %}
8096
8097 // Store Null Pointer, mark word, or other simple pointer constant.
8098 instruct storeImmP(memory mem, immP31 src)
8099 %{
8100 predicate(n->as_Store()->barrier_data() == 0);
8101 match(Set mem (StoreP mem src));
8102
8103 ins_cost(150); // XXX
8104 format %{ "movq $mem, $src\t# ptr" %}
8105 ins_encode %{
8106 __ movq($mem$$Address, $src$$constant);
8107 %}
8108 ins_pipe(ialu_mem_imm);
8109 %}
8110
8111 // Store Compressed Pointer
8112 instruct storeN(memory mem, rRegN src)
8113 %{
8114 predicate(n->as_Store()->barrier_data() == 0);
8115 match(Set mem (StoreN mem src));
8116
8117 ins_cost(125); // XXX
8118 format %{ "movl $mem, $src\t# compressed ptr" %}
8119 ins_encode %{
8120 __ movl($mem$$Address, $src$$Register);
8121 %}
8122 ins_pipe(ialu_mem_reg);
8123 %}
8124
8125 instruct storeNKlass(memory mem, rRegN src)
8126 %{
8127 match(Set mem (StoreNKlass mem src));
8128
8129 ins_cost(125); // XXX
8130 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8131 ins_encode %{
8132 __ movl($mem$$Address, $src$$Register);
8133 %}
8134 ins_pipe(ialu_mem_reg);
8135 %}
8136
8137 instruct storeImmN0(memory mem, immN0 zero)
8138 %{
8139 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8140 match(Set mem (StoreN mem zero));
8141
8142 ins_cost(125); // XXX
8143 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8144 ins_encode %{
8145 __ movl($mem$$Address, r12);
8146 %}
8147 ins_pipe(ialu_mem_reg);
8148 %}
8149
8150 instruct storeImmN(memory mem, immN src)
8151 %{
8152 predicate(n->as_Store()->barrier_data() == 0);
8153 match(Set mem (StoreN mem src));
8154
8155 ins_cost(150); // XXX
8156 format %{ "movl $mem, $src\t# compressed ptr" %}
8157 ins_encode %{
8158 address con = (address)$src$$constant;
8159 if (con == nullptr) {
8160 __ movl($mem$$Address, 0);
8161 } else {
8162 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8163 }
8164 %}
8165 ins_pipe(ialu_mem_imm);
8166 %}
8167
8168 instruct storeImmNKlass(memory mem, immNKlass src)
8169 %{
8170 match(Set mem (StoreNKlass mem src));
8171
8172 ins_cost(150); // XXX
8173 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8174 ins_encode %{
8175 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8176 %}
8177 ins_pipe(ialu_mem_imm);
8178 %}
8179
8180 // Store Integer Immediate
8181 instruct storeImmI0(memory mem, immI_0 zero)
8182 %{
8183 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8184 match(Set mem (StoreI mem zero));
8185
8186 ins_cost(125); // XXX
8187 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8188 ins_encode %{
8189 __ movl($mem$$Address, r12);
8190 %}
8191 ins_pipe(ialu_mem_reg);
8192 %}
8193
8194 instruct storeImmI(memory mem, immI src)
8195 %{
8196 match(Set mem (StoreI mem src));
8197
8198 ins_cost(150);
8199 format %{ "movl $mem, $src\t# int" %}
8200 ins_encode %{
8201 __ movl($mem$$Address, $src$$constant);
8202 %}
8203 ins_pipe(ialu_mem_imm);
8204 %}
8205
8206 // Store Long Immediate
8207 instruct storeImmL0(memory mem, immL0 zero)
8208 %{
8209 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8210 match(Set mem (StoreL mem zero));
8211
8212 ins_cost(125); // XXX
8213 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8214 ins_encode %{
8215 __ movq($mem$$Address, r12);
8216 %}
8217 ins_pipe(ialu_mem_reg);
8218 %}
8219
8220 instruct storeImmL(memory mem, immL32 src)
8221 %{
8222 match(Set mem (StoreL mem src));
8223
8224 ins_cost(150);
8225 format %{ "movq $mem, $src\t# long" %}
8226 ins_encode %{
8227 __ movq($mem$$Address, $src$$constant);
8228 %}
8229 ins_pipe(ialu_mem_imm);
8230 %}
8231
8232 // Store Short/Char Immediate
8233 instruct storeImmC0(memory mem, immI_0 zero)
8234 %{
8235 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8236 match(Set mem (StoreC mem zero));
8237
8238 ins_cost(125); // XXX
8239 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8240 ins_encode %{
8241 __ movw($mem$$Address, r12);
8242 %}
8243 ins_pipe(ialu_mem_reg);
8244 %}
8245
8246 instruct storeImmI16(memory mem, immI16 src)
8247 %{
8248 predicate(UseStoreImmI16);
8249 match(Set mem (StoreC mem src));
8250
8251 ins_cost(150);
8252 format %{ "movw $mem, $src\t# short/char" %}
8253 ins_encode %{
8254 __ movw($mem$$Address, $src$$constant);
8255 %}
8256 ins_pipe(ialu_mem_imm);
8257 %}
8258
8259 // Store Byte Immediate
8260 instruct storeImmB0(memory mem, immI_0 zero)
8261 %{
8262 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8263 match(Set mem (StoreB mem zero));
8264
8265 ins_cost(125); // XXX
8266 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8267 ins_encode %{
8268 __ movb($mem$$Address, r12);
8269 %}
8270 ins_pipe(ialu_mem_reg);
8271 %}
8272
8273 instruct storeImmB(memory mem, immI8 src)
8274 %{
8275 match(Set mem (StoreB mem src));
8276
8277 ins_cost(150); // XXX
8278 format %{ "movb $mem, $src\t# byte" %}
8279 ins_encode %{
8280 __ movb($mem$$Address, $src$$constant);
8281 %}
8282 ins_pipe(ialu_mem_imm);
8283 %}
8284
8285 // Store Float
8286 instruct storeF(memory mem, regF src)
8287 %{
8288 match(Set mem (StoreF mem src));
8289
8290 ins_cost(95); // XXX
8291 format %{ "movss $mem, $src\t# float" %}
8292 ins_encode %{
8293 __ movflt($mem$$Address, $src$$XMMRegister);
8294 %}
8295 ins_pipe(pipe_slow); // XXX
8296 %}
8297
8298 // Store immediate Float value (it is faster than store from XMM register)
8299 instruct storeF0(memory mem, immF0 zero)
8300 %{
8301 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8302 match(Set mem (StoreF mem zero));
8303
8304 ins_cost(25); // XXX
8305 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8306 ins_encode %{
8307 __ movl($mem$$Address, r12);
8308 %}
8309 ins_pipe(ialu_mem_reg);
8310 %}
8311
8312 instruct storeF_imm(memory mem, immF src)
8313 %{
8314 match(Set mem (StoreF mem src));
8315
8316 ins_cost(50);
8317 format %{ "movl $mem, $src\t# float" %}
8318 ins_encode %{
8319 __ movl($mem$$Address, jint_cast($src$$constant));
8320 %}
8321 ins_pipe(ialu_mem_imm);
8322 %}
8323
8324 // Store Double
8325 instruct storeD(memory mem, regD src)
8326 %{
8327 match(Set mem (StoreD mem src));
8328
8329 ins_cost(95); // XXX
8330 format %{ "movsd $mem, $src\t# double" %}
8331 ins_encode %{
8332 __ movdbl($mem$$Address, $src$$XMMRegister);
8333 %}
8334 ins_pipe(pipe_slow); // XXX
8335 %}
8336
8337 // Store immediate double 0.0 (it is faster than store from XMM register)
8338 instruct storeD0_imm(memory mem, immD0 src)
8339 %{
8340 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8341 match(Set mem (StoreD mem src));
8342
8343 ins_cost(50);
8344 format %{ "movq $mem, $src\t# double 0." %}
8345 ins_encode %{
8346 __ movq($mem$$Address, $src$$constant);
8347 %}
8348 ins_pipe(ialu_mem_imm);
8349 %}
8350
8351 instruct storeD0(memory mem, immD0 zero)
8352 %{
8353 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8354 match(Set mem (StoreD mem zero));
8355
8356 ins_cost(25); // XXX
8357 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8358 ins_encode %{
8359 __ movq($mem$$Address, r12);
8360 %}
8361 ins_pipe(ialu_mem_reg);
8362 %}
8363
8364 instruct storeSSI(stackSlotI dst, rRegI src)
8365 %{
8366 match(Set dst src);
8367
8368 ins_cost(100);
8369 format %{ "movl $dst, $src\t# int stk" %}
8370 ins_encode %{
8371 __ movl($dst$$Address, $src$$Register);
8372 %}
8373 ins_pipe( ialu_mem_reg );
8374 %}
8375
8376 instruct storeSSL(stackSlotL dst, rRegL src)
8377 %{
8378 match(Set dst src);
8379
8380 ins_cost(100);
8381 format %{ "movq $dst, $src\t# long stk" %}
8382 ins_encode %{
8383 __ movq($dst$$Address, $src$$Register);
8384 %}
8385 ins_pipe(ialu_mem_reg);
8386 %}
8387
8388 instruct storeSSP(stackSlotP dst, rRegP src)
8389 %{
8390 match(Set dst src);
8391
8392 ins_cost(100);
8393 format %{ "movq $dst, $src\t# ptr stk" %}
8394 ins_encode %{
8395 __ movq($dst$$Address, $src$$Register);
8396 %}
8397 ins_pipe(ialu_mem_reg);
8398 %}
8399
8400 instruct storeSSF(stackSlotF dst, regF src)
8401 %{
8402 match(Set dst src);
8403
8404 ins_cost(95); // XXX
8405 format %{ "movss $dst, $src\t# float stk" %}
8406 ins_encode %{
8407 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8408 %}
8409 ins_pipe(pipe_slow); // XXX
8410 %}
8411
8412 instruct storeSSD(stackSlotD dst, regD src)
8413 %{
8414 match(Set dst src);
8415
8416 ins_cost(95); // XXX
8417 format %{ "movsd $dst, $src\t# double stk" %}
8418 ins_encode %{
8419 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8420 %}
8421 ins_pipe(pipe_slow); // XXX
8422 %}
8423
8424 instruct cacheWB(indirect addr)
8425 %{
8426 predicate(VM_Version::supports_data_cache_line_flush());
8427 match(CacheWB addr);
8428
8429 ins_cost(100);
8430 format %{"cache wb $addr" %}
8431 ins_encode %{
8432 assert($addr->index_position() < 0, "should be");
8433 assert($addr$$disp == 0, "should be");
8434 __ cache_wb(Address($addr$$base$$Register, 0));
8435 %}
8436 ins_pipe(pipe_slow); // XXX
8437 %}
8438
8439 instruct cacheWBPreSync()
8440 %{
8441 predicate(VM_Version::supports_data_cache_line_flush());
8442 match(CacheWBPreSync);
8443
8444 ins_cost(100);
8445 format %{"cache wb presync" %}
8446 ins_encode %{
8447 __ cache_wbsync(true);
8448 %}
8449 ins_pipe(pipe_slow); // XXX
8450 %}
8451
8452 instruct cacheWBPostSync()
8453 %{
8454 predicate(VM_Version::supports_data_cache_line_flush());
8455 match(CacheWBPostSync);
8456
8457 ins_cost(100);
8458 format %{"cache wb postsync" %}
8459 ins_encode %{
8460 __ cache_wbsync(false);
8461 %}
8462 ins_pipe(pipe_slow); // XXX
8463 %}
8464
8465 //----------BSWAP Instructions-------------------------------------------------
8466 instruct bytes_reverse_int(rRegI dst) %{
8467 match(Set dst (ReverseBytesI dst));
8468
8469 format %{ "bswapl $dst" %}
8470 ins_encode %{
8471 __ bswapl($dst$$Register);
8472 %}
8473 ins_pipe( ialu_reg );
8474 %}
8475
8476 instruct bytes_reverse_long(rRegL dst) %{
8477 match(Set dst (ReverseBytesL dst));
8478
8479 format %{ "bswapq $dst" %}
8480 ins_encode %{
8481 __ bswapq($dst$$Register);
8482 %}
8483 ins_pipe( ialu_reg);
8484 %}
8485
8486 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8487 match(Set dst (ReverseBytesUS dst));
8488 effect(KILL cr);
8489
8490 format %{ "bswapl $dst\n\t"
8491 "shrl $dst,16\n\t" %}
8492 ins_encode %{
8493 __ bswapl($dst$$Register);
8494 __ shrl($dst$$Register, 16);
8495 %}
8496 ins_pipe( ialu_reg );
8497 %}
8498
8499 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8500 match(Set dst (ReverseBytesS dst));
8501 effect(KILL cr);
8502
8503 format %{ "bswapl $dst\n\t"
8504 "sar $dst,16\n\t" %}
8505 ins_encode %{
8506 __ bswapl($dst$$Register);
8507 __ sarl($dst$$Register, 16);
8508 %}
8509 ins_pipe( ialu_reg );
8510 %}
8511
8512 //---------- Zeros Count Instructions ------------------------------------------
8513
8514 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8515 predicate(UseCountLeadingZerosInstruction);
8516 match(Set dst (CountLeadingZerosI src));
8517 effect(KILL cr);
8518
8519 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8520 ins_encode %{
8521 __ lzcntl($dst$$Register, $src$$Register);
8522 %}
8523 ins_pipe(ialu_reg);
8524 %}
8525
8526 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8527 predicate(UseCountLeadingZerosInstruction);
8528 match(Set dst (CountLeadingZerosI (LoadI src)));
8529 effect(KILL cr);
8530 ins_cost(175);
8531 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8532 ins_encode %{
8533 __ lzcntl($dst$$Register, $src$$Address);
8534 %}
8535 ins_pipe(ialu_reg_mem);
8536 %}
8537
8538 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8539 predicate(!UseCountLeadingZerosInstruction);
8540 match(Set dst (CountLeadingZerosI src));
8541 effect(KILL cr);
8542
8543 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8544 "jnz skip\n\t"
8545 "movl $dst, -1\n"
8546 "skip:\n\t"
8547 "negl $dst\n\t"
8548 "addl $dst, 31" %}
8549 ins_encode %{
8550 Register Rdst = $dst$$Register;
8551 Register Rsrc = $src$$Register;
8552 Label skip;
8553 __ bsrl(Rdst, Rsrc);
8554 __ jccb(Assembler::notZero, skip);
8555 __ movl(Rdst, -1);
8556 __ bind(skip);
8557 __ negl(Rdst);
8558 __ addl(Rdst, BitsPerInt - 1);
8559 %}
8560 ins_pipe(ialu_reg);
8561 %}
8562
8563 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8564 predicate(UseCountLeadingZerosInstruction);
8565 match(Set dst (CountLeadingZerosL src));
8566 effect(KILL cr);
8567
8568 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8569 ins_encode %{
8570 __ lzcntq($dst$$Register, $src$$Register);
8571 %}
8572 ins_pipe(ialu_reg);
8573 %}
8574
8575 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8576 predicate(UseCountLeadingZerosInstruction);
8577 match(Set dst (CountLeadingZerosL (LoadL src)));
8578 effect(KILL cr);
8579 ins_cost(175);
8580 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8581 ins_encode %{
8582 __ lzcntq($dst$$Register, $src$$Address);
8583 %}
8584 ins_pipe(ialu_reg_mem);
8585 %}
8586
8587 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8588 predicate(!UseCountLeadingZerosInstruction);
8589 match(Set dst (CountLeadingZerosL src));
8590 effect(KILL cr);
8591
8592 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8593 "jnz skip\n\t"
8594 "movl $dst, -1\n"
8595 "skip:\n\t"
8596 "negl $dst\n\t"
8597 "addl $dst, 63" %}
8598 ins_encode %{
8599 Register Rdst = $dst$$Register;
8600 Register Rsrc = $src$$Register;
8601 Label skip;
8602 __ bsrq(Rdst, Rsrc);
8603 __ jccb(Assembler::notZero, skip);
8604 __ movl(Rdst, -1);
8605 __ bind(skip);
8606 __ negl(Rdst);
8607 __ addl(Rdst, BitsPerLong - 1);
8608 %}
8609 ins_pipe(ialu_reg);
8610 %}
8611
8612 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8613 predicate(UseCountTrailingZerosInstruction);
8614 match(Set dst (CountTrailingZerosI src));
8615 effect(KILL cr);
8616
8617 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8618 ins_encode %{
8619 __ tzcntl($dst$$Register, $src$$Register);
8620 %}
8621 ins_pipe(ialu_reg);
8622 %}
8623
8624 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8625 predicate(UseCountTrailingZerosInstruction);
8626 match(Set dst (CountTrailingZerosI (LoadI src)));
8627 effect(KILL cr);
8628 ins_cost(175);
8629 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8630 ins_encode %{
8631 __ tzcntl($dst$$Register, $src$$Address);
8632 %}
8633 ins_pipe(ialu_reg_mem);
8634 %}
8635
8636 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8637 predicate(!UseCountTrailingZerosInstruction);
8638 match(Set dst (CountTrailingZerosI src));
8639 effect(KILL cr);
8640
8641 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8642 "jnz done\n\t"
8643 "movl $dst, 32\n"
8644 "done:" %}
8645 ins_encode %{
8646 Register Rdst = $dst$$Register;
8647 Label done;
8648 __ bsfl(Rdst, $src$$Register);
8649 __ jccb(Assembler::notZero, done);
8650 __ movl(Rdst, BitsPerInt);
8651 __ bind(done);
8652 %}
8653 ins_pipe(ialu_reg);
8654 %}
8655
8656 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8657 predicate(UseCountTrailingZerosInstruction);
8658 match(Set dst (CountTrailingZerosL src));
8659 effect(KILL cr);
8660
8661 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8662 ins_encode %{
8663 __ tzcntq($dst$$Register, $src$$Register);
8664 %}
8665 ins_pipe(ialu_reg);
8666 %}
8667
8668 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8669 predicate(UseCountTrailingZerosInstruction);
8670 match(Set dst (CountTrailingZerosL (LoadL src)));
8671 effect(KILL cr);
8672 ins_cost(175);
8673 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8674 ins_encode %{
8675 __ tzcntq($dst$$Register, $src$$Address);
8676 %}
8677 ins_pipe(ialu_reg_mem);
8678 %}
8679
8680 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8681 predicate(!UseCountTrailingZerosInstruction);
8682 match(Set dst (CountTrailingZerosL src));
8683 effect(KILL cr);
8684
8685 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8686 "jnz done\n\t"
8687 "movl $dst, 64\n"
8688 "done:" %}
8689 ins_encode %{
8690 Register Rdst = $dst$$Register;
8691 Label done;
8692 __ bsfq(Rdst, $src$$Register);
8693 __ jccb(Assembler::notZero, done);
8694 __ movl(Rdst, BitsPerLong);
8695 __ bind(done);
8696 %}
8697 ins_pipe(ialu_reg);
8698 %}
8699
8700 //--------------- Reverse Operation Instructions ----------------
8701 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8702 predicate(!VM_Version::supports_gfni());
8703 match(Set dst (ReverseI src));
8704 effect(TEMP dst, TEMP rtmp, KILL cr);
8705 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8706 ins_encode %{
8707 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8708 %}
8709 ins_pipe( ialu_reg );
8710 %}
8711
8712 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8713 predicate(VM_Version::supports_gfni());
8714 match(Set dst (ReverseI src));
8715 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8716 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8717 ins_encode %{
8718 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8719 %}
8720 ins_pipe( ialu_reg );
8721 %}
8722
8723 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8724 predicate(!VM_Version::supports_gfni());
8725 match(Set dst (ReverseL src));
8726 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8727 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8728 ins_encode %{
8729 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8730 %}
8731 ins_pipe( ialu_reg );
8732 %}
8733
8734 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8735 predicate(VM_Version::supports_gfni());
8736 match(Set dst (ReverseL src));
8737 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8738 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8739 ins_encode %{
8740 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8741 %}
8742 ins_pipe( ialu_reg );
8743 %}
8744
8745 //---------- Population Count Instructions -------------------------------------
8746
8747 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8748 predicate(UsePopCountInstruction);
8749 match(Set dst (PopCountI src));
8750 effect(KILL cr);
8751
8752 format %{ "popcnt $dst, $src" %}
8753 ins_encode %{
8754 __ popcntl($dst$$Register, $src$$Register);
8755 %}
8756 ins_pipe(ialu_reg);
8757 %}
8758
8759 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8760 predicate(UsePopCountInstruction);
8761 match(Set dst (PopCountI (LoadI mem)));
8762 effect(KILL cr);
8763
8764 format %{ "popcnt $dst, $mem" %}
8765 ins_encode %{
8766 __ popcntl($dst$$Register, $mem$$Address);
8767 %}
8768 ins_pipe(ialu_reg);
8769 %}
8770
8771 // Note: Long.bitCount(long) returns an int.
8772 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8773 predicate(UsePopCountInstruction);
8774 match(Set dst (PopCountL src));
8775 effect(KILL cr);
8776
8777 format %{ "popcnt $dst, $src" %}
8778 ins_encode %{
8779 __ popcntq($dst$$Register, $src$$Register);
8780 %}
8781 ins_pipe(ialu_reg);
8782 %}
8783
8784 // Note: Long.bitCount(long) returns an int.
8785 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8786 predicate(UsePopCountInstruction);
8787 match(Set dst (PopCountL (LoadL mem)));
8788 effect(KILL cr);
8789
8790 format %{ "popcnt $dst, $mem" %}
8791 ins_encode %{
8792 __ popcntq($dst$$Register, $mem$$Address);
8793 %}
8794 ins_pipe(ialu_reg);
8795 %}
8796
8797
8798 //----------MemBar Instructions-----------------------------------------------
8799 // Memory barrier flavors
8800
8801 instruct membar_acquire()
8802 %{
8803 match(MemBarAcquire);
8804 match(LoadFence);
8805 ins_cost(0);
8806
8807 size(0);
8808 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8809 ins_encode();
8810 ins_pipe(empty);
8811 %}
8812
8813 instruct membar_acquire_lock()
8814 %{
8815 match(MemBarAcquireLock);
8816 ins_cost(0);
8817
8818 size(0);
8819 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8820 ins_encode();
8821 ins_pipe(empty);
8822 %}
8823
8824 instruct membar_release()
8825 %{
8826 match(MemBarRelease);
8827 match(StoreFence);
8828 ins_cost(0);
8829
8830 size(0);
8831 format %{ "MEMBAR-release ! (empty encoding)" %}
8832 ins_encode();
8833 ins_pipe(empty);
8834 %}
8835
8836 instruct membar_release_lock()
8837 %{
8838 match(MemBarReleaseLock);
8839 ins_cost(0);
8840
8841 size(0);
8842 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8843 ins_encode();
8844 ins_pipe(empty);
8845 %}
8846
8847 instruct membar_volatile(rFlagsReg cr) %{
8848 match(MemBarVolatile);
8849 effect(KILL cr);
8850 ins_cost(400);
8851
8852 format %{
8853 $$template
8854 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8855 %}
8856 ins_encode %{
8857 __ membar(Assembler::StoreLoad);
8858 %}
8859 ins_pipe(pipe_slow);
8860 %}
8861
8862 instruct unnecessary_membar_volatile()
8863 %{
8864 match(MemBarVolatile);
8865 predicate(Matcher::post_store_load_barrier(n));
8866 ins_cost(0);
8867
8868 size(0);
8869 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8870 ins_encode();
8871 ins_pipe(empty);
8872 %}
8873
8874 instruct membar_storestore() %{
8875 match(MemBarStoreStore);
8876 match(StoreStoreFence);
8877 ins_cost(0);
8878
8879 size(0);
8880 format %{ "MEMBAR-storestore (empty encoding)" %}
8881 ins_encode( );
8882 ins_pipe(empty);
8883 %}
8884
8885 //----------Move Instructions--------------------------------------------------
8886
8887 instruct castX2P(rRegP dst, rRegL src)
8888 %{
8889 match(Set dst (CastX2P src));
8890
8891 format %{ "movq $dst, $src\t# long->ptr" %}
8892 ins_encode %{
8893 if ($dst$$reg != $src$$reg) {
8894 __ movptr($dst$$Register, $src$$Register);
8895 }
8896 %}
8897 ins_pipe(ialu_reg_reg); // XXX
8898 %}
8899
8900 instruct castI2N(rRegN dst, rRegI src)
8901 %{
8902 match(Set dst (CastI2N src));
8903
8904 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8905 ins_encode %{
8906 if ($dst$$reg != $src$$reg) {
8907 __ movl($dst$$Register, $src$$Register);
8908 }
8909 %}
8910 ins_pipe(ialu_reg_reg); // XXX
8911 %}
8912
8913 instruct castN2X(rRegL dst, rRegN src)
8914 %{
8915 match(Set dst (CastP2X src));
8916
8917 format %{ "movq $dst, $src\t# ptr -> long" %}
8918 ins_encode %{
8919 if ($dst$$reg != $src$$reg) {
8920 __ movptr($dst$$Register, $src$$Register);
8921 }
8922 %}
8923 ins_pipe(ialu_reg_reg); // XXX
8924 %}
8925
8926 instruct castP2X(rRegL dst, rRegP src)
8927 %{
8928 match(Set dst (CastP2X src));
8929
8930 format %{ "movq $dst, $src\t# ptr -> long" %}
8931 ins_encode %{
8932 if ($dst$$reg != $src$$reg) {
8933 __ movptr($dst$$Register, $src$$Register);
8934 }
8935 %}
8936 ins_pipe(ialu_reg_reg); // XXX
8937 %}
8938
8939 // Convert oop into int for vectors alignment masking
8940 instruct convP2I(rRegI dst, rRegP src)
8941 %{
8942 match(Set dst (ConvL2I (CastP2X src)));
8943
8944 format %{ "movl $dst, $src\t# ptr -> int" %}
8945 ins_encode %{
8946 __ movl($dst$$Register, $src$$Register);
8947 %}
8948 ins_pipe(ialu_reg_reg); // XXX
8949 %}
8950
8951 // Convert compressed oop into int for vectors alignment masking
8952 // in case of 32bit oops (heap < 4Gb).
8953 instruct convN2I(rRegI dst, rRegN src)
8954 %{
8955 predicate(CompressedOops::shift() == 0);
8956 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8957
8958 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8959 ins_encode %{
8960 __ movl($dst$$Register, $src$$Register);
8961 %}
8962 ins_pipe(ialu_reg_reg); // XXX
8963 %}
8964
8965 // Convert oop pointer into compressed form
8966 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8967 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8968 match(Set dst (EncodeP src));
8969 effect(KILL cr);
8970 format %{ "encode_heap_oop $dst,$src" %}
8971 ins_encode %{
8972 Register s = $src$$Register;
8973 Register d = $dst$$Register;
8974 if (s != d) {
8975 __ movq(d, s);
8976 }
8977 __ encode_heap_oop(d);
8978 %}
8979 ins_pipe(ialu_reg_long);
8980 %}
8981
8982 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8983 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8984 match(Set dst (EncodeP src));
8985 effect(KILL cr);
8986 format %{ "encode_heap_oop_not_null $dst,$src" %}
8987 ins_encode %{
8988 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8989 %}
8990 ins_pipe(ialu_reg_long);
8991 %}
8992
8993 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8994 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8995 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8996 match(Set dst (DecodeN src));
8997 effect(KILL cr);
8998 format %{ "decode_heap_oop $dst,$src" %}
8999 ins_encode %{
9000 Register s = $src$$Register;
9001 Register d = $dst$$Register;
9002 if (s != d) {
9003 __ movq(d, s);
9004 }
9005 __ decode_heap_oop(d);
9006 %}
9007 ins_pipe(ialu_reg_long);
9008 %}
9009
9010 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9011 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9012 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9013 match(Set dst (DecodeN src));
9014 effect(KILL cr);
9015 format %{ "decode_heap_oop_not_null $dst,$src" %}
9016 ins_encode %{
9017 Register s = $src$$Register;
9018 Register d = $dst$$Register;
9019 if (s != d) {
9020 __ decode_heap_oop_not_null(d, s);
9021 } else {
9022 __ decode_heap_oop_not_null(d);
9023 }
9024 %}
9025 ins_pipe(ialu_reg_long);
9026 %}
9027
9028 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9029 match(Set dst (EncodePKlass src));
9030 effect(TEMP dst, KILL cr);
9031 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9032 ins_encode %{
9033 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9034 %}
9035 ins_pipe(ialu_reg_long);
9036 %}
9037
9038 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9039 match(Set dst (DecodeNKlass src));
9040 effect(TEMP dst, KILL cr);
9041 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9042 ins_encode %{
9043 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9044 %}
9045 ins_pipe(ialu_reg_long);
9046 %}
9047
9048 //----------Conditional Move---------------------------------------------------
9049 // Jump
9050 // dummy instruction for generating temp registers
9051 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9052 match(Jump (LShiftL switch_val shift));
9053 ins_cost(350);
9054 predicate(false);
9055 effect(TEMP dest);
9056
9057 format %{ "leaq $dest, [$constantaddress]\n\t"
9058 "jmp [$dest + $switch_val << $shift]\n\t" %}
9059 ins_encode %{
9060 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9061 // to do that and the compiler is using that register as one it can allocate.
9062 // So we build it all by hand.
9063 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9064 // ArrayAddress dispatch(table, index);
9065 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9066 __ lea($dest$$Register, $constantaddress);
9067 __ jmp(dispatch);
9068 %}
9069 ins_pipe(pipe_jmp);
9070 %}
9071
9072 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9073 match(Jump (AddL (LShiftL switch_val shift) offset));
9074 ins_cost(350);
9075 effect(TEMP dest);
9076
9077 format %{ "leaq $dest, [$constantaddress]\n\t"
9078 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9079 ins_encode %{
9080 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9081 // to do that and the compiler is using that register as one it can allocate.
9082 // So we build it all by hand.
9083 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9084 // ArrayAddress dispatch(table, index);
9085 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9086 __ lea($dest$$Register, $constantaddress);
9087 __ jmp(dispatch);
9088 %}
9089 ins_pipe(pipe_jmp);
9090 %}
9091
9092 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9093 match(Jump switch_val);
9094 ins_cost(350);
9095 effect(TEMP dest);
9096
9097 format %{ "leaq $dest, [$constantaddress]\n\t"
9098 "jmp [$dest + $switch_val]\n\t" %}
9099 ins_encode %{
9100 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9101 // to do that and the compiler is using that register as one it can allocate.
9102 // So we build it all by hand.
9103 // Address index(noreg, switch_reg, Address::times_1);
9104 // ArrayAddress dispatch(table, index);
9105 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9106 __ lea($dest$$Register, $constantaddress);
9107 __ jmp(dispatch);
9108 %}
9109 ins_pipe(pipe_jmp);
9110 %}
9111
9112 // Conditional move
9113 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9114 %{
9115 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9116 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9117
9118 ins_cost(100); // XXX
9119 format %{ "setbn$cop $dst\t# signed, int" %}
9120 ins_encode %{
9121 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9122 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9123 %}
9124 ins_pipe(ialu_reg);
9125 %}
9126
9127 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9128 %{
9129 predicate(!UseAPX);
9130 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9131
9132 ins_cost(200); // XXX
9133 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9134 ins_encode %{
9135 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9136 %}
9137 ins_pipe(pipe_cmov_reg);
9138 %}
9139
9140 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9141 %{
9142 predicate(UseAPX);
9143 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9144
9145 ins_cost(200);
9146 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9147 ins_encode %{
9148 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9149 %}
9150 ins_pipe(pipe_cmov_reg);
9151 %}
9152
9153 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9154 %{
9155 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9156 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9157
9158 ins_cost(100); // XXX
9159 format %{ "setbn$cop $dst\t# unsigned, int" %}
9160 ins_encode %{
9161 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9162 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9163 %}
9164 ins_pipe(ialu_reg);
9165 %}
9166
9167 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9168 predicate(!UseAPX);
9169 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9170
9171 ins_cost(200); // XXX
9172 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9173 ins_encode %{
9174 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9175 %}
9176 ins_pipe(pipe_cmov_reg);
9177 %}
9178
9179 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9180 predicate(UseAPX);
9181 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9182
9183 ins_cost(200);
9184 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9185 ins_encode %{
9186 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9187 %}
9188 ins_pipe(pipe_cmov_reg);
9189 %}
9190
9191 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9192 %{
9193 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9194 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9195
9196 ins_cost(100); // XXX
9197 format %{ "setbn$cop $dst\t# unsigned, int" %}
9198 ins_encode %{
9199 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9200 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9201 %}
9202 ins_pipe(ialu_reg);
9203 %}
9204
9205 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9206 predicate(!UseAPX);
9207 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9208 ins_cost(200);
9209 expand %{
9210 cmovI_regU(cop, cr, dst, src);
9211 %}
9212 %}
9213
9214 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9215 predicate(UseAPX);
9216 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9217 ins_cost(200);
9218 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9219 ins_encode %{
9220 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9221 %}
9222 ins_pipe(pipe_cmov_reg);
9223 %}
9224
9225 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9226 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9227 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9228
9229 ins_cost(200); // XXX
9230 format %{ "cmovpl $dst, $src\n\t"
9231 "cmovnel $dst, $src" %}
9232 ins_encode %{
9233 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9234 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9235 %}
9236 ins_pipe(pipe_cmov_reg);
9237 %}
9238
9239 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9240 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9241 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9242 effect(TEMP dst);
9243
9244 ins_cost(200);
9245 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9246 "cmovnel $dst, $src2" %}
9247 ins_encode %{
9248 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9249 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9250 %}
9251 ins_pipe(pipe_cmov_reg);
9252 %}
9253
9254 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9255 // inputs of the CMove
9256 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9257 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9258 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9259 effect(TEMP dst);
9260
9261 ins_cost(200); // XXX
9262 format %{ "cmovpl $dst, $src\n\t"
9263 "cmovnel $dst, $src" %}
9264 ins_encode %{
9265 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9266 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9267 %}
9268 ins_pipe(pipe_cmov_reg);
9269 %}
9270
9271 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9272 // and parity flag bit is set if any of the operand is a NaN.
9273 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9274 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9275 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9276 effect(TEMP dst);
9277
9278 ins_cost(200);
9279 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9280 "cmovnel $dst, $src2" %}
9281 ins_encode %{
9282 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9283 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9284 %}
9285 ins_pipe(pipe_cmov_reg);
9286 %}
9287
9288 // Conditional move
9289 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9290 predicate(!UseAPX);
9291 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9292
9293 ins_cost(250); // XXX
9294 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9295 ins_encode %{
9296 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9297 %}
9298 ins_pipe(pipe_cmov_mem);
9299 %}
9300
9301 // Conditional move
9302 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9303 %{
9304 predicate(UseAPX);
9305 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9306
9307 ins_cost(250);
9308 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9309 ins_encode %{
9310 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9311 %}
9312 ins_pipe(pipe_cmov_mem);
9313 %}
9314
9315 // Conditional move
9316 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9317 %{
9318 predicate(!UseAPX);
9319 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9320
9321 ins_cost(250); // XXX
9322 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9323 ins_encode %{
9324 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9325 %}
9326 ins_pipe(pipe_cmov_mem);
9327 %}
9328
9329 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9330 predicate(!UseAPX);
9331 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9332 ins_cost(250);
9333 expand %{
9334 cmovI_memU(cop, cr, dst, src);
9335 %}
9336 %}
9337
9338 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9339 %{
9340 predicate(UseAPX);
9341 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9342
9343 ins_cost(250);
9344 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9345 ins_encode %{
9346 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9347 %}
9348 ins_pipe(pipe_cmov_mem);
9349 %}
9350
9351 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9352 %{
9353 predicate(UseAPX);
9354 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9355 ins_cost(250);
9356 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9357 ins_encode %{
9358 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9359 %}
9360 ins_pipe(pipe_cmov_mem);
9361 %}
9362
9363 // Conditional move
9364 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9365 %{
9366 predicate(!UseAPX);
9367 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9368
9369 ins_cost(200); // XXX
9370 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9371 ins_encode %{
9372 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9373 %}
9374 ins_pipe(pipe_cmov_reg);
9375 %}
9376
9377 // Conditional move ndd
9378 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9379 %{
9380 predicate(UseAPX);
9381 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9382
9383 ins_cost(200);
9384 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9385 ins_encode %{
9386 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9387 %}
9388 ins_pipe(pipe_cmov_reg);
9389 %}
9390
9391 // Conditional move
9392 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9393 %{
9394 predicate(!UseAPX);
9395 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9396
9397 ins_cost(200); // XXX
9398 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9399 ins_encode %{
9400 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9401 %}
9402 ins_pipe(pipe_cmov_reg);
9403 %}
9404
9405 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9406 predicate(!UseAPX);
9407 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9408 ins_cost(200);
9409 expand %{
9410 cmovN_regU(cop, cr, dst, src);
9411 %}
9412 %}
9413
9414 // Conditional move ndd
9415 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9416 %{
9417 predicate(UseAPX);
9418 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9419
9420 ins_cost(200);
9421 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9422 ins_encode %{
9423 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9424 %}
9425 ins_pipe(pipe_cmov_reg);
9426 %}
9427
9428 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9429 predicate(UseAPX);
9430 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9431 ins_cost(200);
9432 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9433 ins_encode %{
9434 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9435 %}
9436 ins_pipe(pipe_cmov_reg);
9437 %}
9438
9439 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9440 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9441 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9442
9443 ins_cost(200); // XXX
9444 format %{ "cmovpl $dst, $src\n\t"
9445 "cmovnel $dst, $src" %}
9446 ins_encode %{
9447 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9448 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9449 %}
9450 ins_pipe(pipe_cmov_reg);
9451 %}
9452
9453 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9454 // inputs of the CMove
9455 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9456 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9457 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9458
9459 ins_cost(200); // XXX
9460 format %{ "cmovpl $dst, $src\n\t"
9461 "cmovnel $dst, $src" %}
9462 ins_encode %{
9463 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9464 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9465 %}
9466 ins_pipe(pipe_cmov_reg);
9467 %}
9468
9469 // Conditional move
9470 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9471 %{
9472 predicate(!UseAPX);
9473 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9474
9475 ins_cost(200); // XXX
9476 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9477 ins_encode %{
9478 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9479 %}
9480 ins_pipe(pipe_cmov_reg); // XXX
9481 %}
9482
9483 // Conditional move ndd
9484 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9485 %{
9486 predicate(UseAPX);
9487 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9488
9489 ins_cost(200);
9490 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9491 ins_encode %{
9492 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9493 %}
9494 ins_pipe(pipe_cmov_reg);
9495 %}
9496
9497 // Conditional move
9498 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9499 %{
9500 predicate(!UseAPX);
9501 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9502
9503 ins_cost(200); // XXX
9504 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9505 ins_encode %{
9506 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9507 %}
9508 ins_pipe(pipe_cmov_reg); // XXX
9509 %}
9510
9511 // Conditional move ndd
9512 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9513 %{
9514 predicate(UseAPX);
9515 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9516
9517 ins_cost(200);
9518 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9519 ins_encode %{
9520 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9521 %}
9522 ins_pipe(pipe_cmov_reg);
9523 %}
9524
9525 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9526 predicate(!UseAPX);
9527 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9528 ins_cost(200);
9529 expand %{
9530 cmovP_regU(cop, cr, dst, src);
9531 %}
9532 %}
9533
9534 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9535 predicate(UseAPX);
9536 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9537 ins_cost(200);
9538 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9539 ins_encode %{
9540 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9541 %}
9542 ins_pipe(pipe_cmov_reg);
9543 %}
9544
9545 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9546 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9547 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9548
9549 ins_cost(200); // XXX
9550 format %{ "cmovpq $dst, $src\n\t"
9551 "cmovneq $dst, $src" %}
9552 ins_encode %{
9553 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9554 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9555 %}
9556 ins_pipe(pipe_cmov_reg);
9557 %}
9558
9559 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9560 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9561 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9562 effect(TEMP dst);
9563
9564 ins_cost(200);
9565 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9566 "cmovneq $dst, $src2" %}
9567 ins_encode %{
9568 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9569 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9570 %}
9571 ins_pipe(pipe_cmov_reg);
9572 %}
9573
9574 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9575 // inputs of the CMove
9576 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9577 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9578 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9579
9580 ins_cost(200); // XXX
9581 format %{ "cmovpq $dst, $src\n\t"
9582 "cmovneq $dst, $src" %}
9583 ins_encode %{
9584 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9585 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9586 %}
9587 ins_pipe(pipe_cmov_reg);
9588 %}
9589
9590 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9591 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9592 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9593 effect(TEMP dst);
9594
9595 ins_cost(200);
9596 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9597 "cmovneq $dst, $src2" %}
9598 ins_encode %{
9599 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9600 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9601 %}
9602 ins_pipe(pipe_cmov_reg);
9603 %}
9604
9605 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9606 %{
9607 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9608 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9609
9610 ins_cost(100); // XXX
9611 format %{ "setbn$cop $dst\t# signed, long" %}
9612 ins_encode %{
9613 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9614 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9615 %}
9616 ins_pipe(ialu_reg);
9617 %}
9618
9619 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9620 %{
9621 predicate(!UseAPX);
9622 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9623
9624 ins_cost(200); // XXX
9625 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9626 ins_encode %{
9627 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9628 %}
9629 ins_pipe(pipe_cmov_reg); // XXX
9630 %}
9631
9632 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9633 %{
9634 predicate(UseAPX);
9635 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9636
9637 ins_cost(200);
9638 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9639 ins_encode %{
9640 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9641 %}
9642 ins_pipe(pipe_cmov_reg);
9643 %}
9644
9645 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9646 %{
9647 predicate(!UseAPX);
9648 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9649
9650 ins_cost(200); // XXX
9651 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9652 ins_encode %{
9653 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9654 %}
9655 ins_pipe(pipe_cmov_mem); // XXX
9656 %}
9657
9658 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9659 %{
9660 predicate(UseAPX);
9661 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9662
9663 ins_cost(200);
9664 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9665 ins_encode %{
9666 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9667 %}
9668 ins_pipe(pipe_cmov_mem);
9669 %}
9670
9671 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9672 %{
9673 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9674 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9675
9676 ins_cost(100); // XXX
9677 format %{ "setbn$cop $dst\t# unsigned, long" %}
9678 ins_encode %{
9679 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9680 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9681 %}
9682 ins_pipe(ialu_reg);
9683 %}
9684
9685 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9686 %{
9687 predicate(!UseAPX);
9688 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9689
9690 ins_cost(200); // XXX
9691 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9692 ins_encode %{
9693 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9694 %}
9695 ins_pipe(pipe_cmov_reg); // XXX
9696 %}
9697
9698 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9699 %{
9700 predicate(UseAPX);
9701 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9702
9703 ins_cost(200);
9704 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9705 ins_encode %{
9706 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9707 %}
9708 ins_pipe(pipe_cmov_reg);
9709 %}
9710
9711 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9712 %{
9713 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9714 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9715
9716 ins_cost(100); // XXX
9717 format %{ "setbn$cop $dst\t# unsigned, long" %}
9718 ins_encode %{
9719 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9720 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9721 %}
9722 ins_pipe(ialu_reg);
9723 %}
9724
9725 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9726 predicate(!UseAPX);
9727 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9728 ins_cost(200);
9729 expand %{
9730 cmovL_regU(cop, cr, dst, src);
9731 %}
9732 %}
9733
9734 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9735 %{
9736 predicate(UseAPX);
9737 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9738 ins_cost(200);
9739 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9740 ins_encode %{
9741 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9742 %}
9743 ins_pipe(pipe_cmov_reg);
9744 %}
9745
9746 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9747 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9748 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9749
9750 ins_cost(200); // XXX
9751 format %{ "cmovpq $dst, $src\n\t"
9752 "cmovneq $dst, $src" %}
9753 ins_encode %{
9754 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9755 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9756 %}
9757 ins_pipe(pipe_cmov_reg);
9758 %}
9759
9760 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9761 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9762 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9763 effect(TEMP dst);
9764
9765 ins_cost(200);
9766 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9767 "cmovneq $dst, $src2" %}
9768 ins_encode %{
9769 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9770 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9771 %}
9772 ins_pipe(pipe_cmov_reg);
9773 %}
9774
9775 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9776 // inputs of the CMove
9777 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9778 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9779 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9780
9781 ins_cost(200); // XXX
9782 format %{ "cmovpq $dst, $src\n\t"
9783 "cmovneq $dst, $src" %}
9784 ins_encode %{
9785 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9786 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9787 %}
9788 ins_pipe(pipe_cmov_reg);
9789 %}
9790
9791 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9792 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9793 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9794 effect(TEMP dst);
9795
9796 ins_cost(200);
9797 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9798 "cmovneq $dst, $src2" %}
9799 ins_encode %{
9800 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9801 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9802 %}
9803 ins_pipe(pipe_cmov_reg);
9804 %}
9805
9806 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9807 %{
9808 predicate(!UseAPX);
9809 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9810
9811 ins_cost(200); // XXX
9812 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9813 ins_encode %{
9814 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9815 %}
9816 ins_pipe(pipe_cmov_mem); // XXX
9817 %}
9818
9819 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9820 predicate(!UseAPX);
9821 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9822 ins_cost(200);
9823 expand %{
9824 cmovL_memU(cop, cr, dst, src);
9825 %}
9826 %}
9827
9828 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9829 %{
9830 predicate(UseAPX);
9831 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9832
9833 ins_cost(200);
9834 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9835 ins_encode %{
9836 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9837 %}
9838 ins_pipe(pipe_cmov_mem);
9839 %}
9840
9841 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9842 %{
9843 predicate(UseAPX);
9844 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9845 ins_cost(200);
9846 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9847 ins_encode %{
9848 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9849 %}
9850 ins_pipe(pipe_cmov_mem);
9851 %}
9852
9853 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9854 %{
9855 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9856
9857 ins_cost(200); // XXX
9858 format %{ "jn$cop skip\t# signed cmove float\n\t"
9859 "movss $dst, $src\n"
9860 "skip:" %}
9861 ins_encode %{
9862 Label Lskip;
9863 // Invert sense of branch from sense of CMOV
9864 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9865 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9866 __ bind(Lskip);
9867 %}
9868 ins_pipe(pipe_slow);
9869 %}
9870
9871 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9872 %{
9873 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9874
9875 ins_cost(200); // XXX
9876 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9877 "movss $dst, $src\n"
9878 "skip:" %}
9879 ins_encode %{
9880 Label Lskip;
9881 // Invert sense of branch from sense of CMOV
9882 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9883 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9884 __ bind(Lskip);
9885 %}
9886 ins_pipe(pipe_slow);
9887 %}
9888
9889 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9890 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9891 ins_cost(200);
9892 expand %{
9893 cmovF_regU(cop, cr, dst, src);
9894 %}
9895 %}
9896
9897 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9898 %{
9899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9900
9901 ins_cost(200); // XXX
9902 format %{ "jn$cop skip\t# signed cmove double\n\t"
9903 "movsd $dst, $src\n"
9904 "skip:" %}
9905 ins_encode %{
9906 Label Lskip;
9907 // Invert sense of branch from sense of CMOV
9908 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9909 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9910 __ bind(Lskip);
9911 %}
9912 ins_pipe(pipe_slow);
9913 %}
9914
9915 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9916 %{
9917 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9918
9919 ins_cost(200); // XXX
9920 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9921 "movsd $dst, $src\n"
9922 "skip:" %}
9923 ins_encode %{
9924 Label Lskip;
9925 // Invert sense of branch from sense of CMOV
9926 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9927 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9928 __ bind(Lskip);
9929 %}
9930 ins_pipe(pipe_slow);
9931 %}
9932
9933 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9934 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9935 ins_cost(200);
9936 expand %{
9937 cmovD_regU(cop, cr, dst, src);
9938 %}
9939 %}
9940
9941 //----------Arithmetic Instructions--------------------------------------------
9942 //----------Addition Instructions----------------------------------------------
9943
9944 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9945 %{
9946 predicate(!UseAPX);
9947 match(Set dst (AddI dst src));
9948 effect(KILL cr);
9949 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9950 format %{ "addl $dst, $src\t# int" %}
9951 ins_encode %{
9952 __ addl($dst$$Register, $src$$Register);
9953 %}
9954 ins_pipe(ialu_reg_reg);
9955 %}
9956
9957 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9958 %{
9959 predicate(UseAPX);
9960 match(Set dst (AddI src1 src2));
9961 effect(KILL cr);
9962 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9963
9964 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9965 ins_encode %{
9966 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9967 %}
9968 ins_pipe(ialu_reg_reg);
9969 %}
9970
9971 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9972 %{
9973 predicate(!UseAPX);
9974 match(Set dst (AddI dst src));
9975 effect(KILL cr);
9976 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9977
9978 format %{ "addl $dst, $src\t# int" %}
9979 ins_encode %{
9980 __ addl($dst$$Register, $src$$constant);
9981 %}
9982 ins_pipe( ialu_reg );
9983 %}
9984
9985 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9986 %{
9987 predicate(UseAPX);
9988 match(Set dst (AddI src1 src2));
9989 effect(KILL cr);
9990 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9991
9992 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9993 ins_encode %{
9994 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9995 %}
9996 ins_pipe( ialu_reg );
9997 %}
9998
9999 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10000 %{
10001 predicate(UseAPX);
10002 match(Set dst (AddI (LoadI src1) src2));
10003 effect(KILL cr);
10004 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005
10006 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10007 ins_encode %{
10008 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10009 %}
10010 ins_pipe( ialu_reg );
10011 %}
10012
10013 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10014 %{
10015 predicate(!UseAPX);
10016 match(Set dst (AddI dst (LoadI src)));
10017 effect(KILL cr);
10018 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019
10020 ins_cost(150); // XXX
10021 format %{ "addl $dst, $src\t# int" %}
10022 ins_encode %{
10023 __ addl($dst$$Register, $src$$Address);
10024 %}
10025 ins_pipe(ialu_reg_mem);
10026 %}
10027
10028 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10029 %{
10030 predicate(UseAPX);
10031 match(Set dst (AddI src1 (LoadI src2)));
10032 effect(KILL cr);
10033 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10034
10035 ins_cost(150);
10036 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10037 ins_encode %{
10038 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10039 %}
10040 ins_pipe(ialu_reg_mem);
10041 %}
10042
10043 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10044 %{
10045 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10046 effect(KILL cr);
10047 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10048
10049 ins_cost(150); // XXX
10050 format %{ "addl $dst, $src\t# int" %}
10051 ins_encode %{
10052 __ addl($dst$$Address, $src$$Register);
10053 %}
10054 ins_pipe(ialu_mem_reg);
10055 %}
10056
10057 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10058 %{
10059 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10060 effect(KILL cr);
10061 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10062
10063
10064 ins_cost(125); // XXX
10065 format %{ "addl $dst, $src\t# int" %}
10066 ins_encode %{
10067 __ addl($dst$$Address, $src$$constant);
10068 %}
10069 ins_pipe(ialu_mem_imm);
10070 %}
10071
10072 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10073 %{
10074 predicate(!UseAPX && UseIncDec);
10075 match(Set dst (AddI dst src));
10076 effect(KILL cr);
10077
10078 format %{ "incl $dst\t# int" %}
10079 ins_encode %{
10080 __ incrementl($dst$$Register);
10081 %}
10082 ins_pipe(ialu_reg);
10083 %}
10084
10085 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10086 %{
10087 predicate(UseAPX && UseIncDec);
10088 match(Set dst (AddI src val));
10089 effect(KILL cr);
10090 flag(PD::Flag_ndd_demotable_opr1);
10091
10092 format %{ "eincl $dst, $src\t# int ndd" %}
10093 ins_encode %{
10094 __ eincl($dst$$Register, $src$$Register, false);
10095 %}
10096 ins_pipe(ialu_reg);
10097 %}
10098
10099 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10100 %{
10101 predicate(UseAPX && UseIncDec);
10102 match(Set dst (AddI (LoadI src) val));
10103 effect(KILL cr);
10104
10105 format %{ "eincl $dst, $src\t# int ndd" %}
10106 ins_encode %{
10107 __ eincl($dst$$Register, $src$$Address, false);
10108 %}
10109 ins_pipe(ialu_reg);
10110 %}
10111
10112 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10113 %{
10114 predicate(UseIncDec);
10115 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10116 effect(KILL cr);
10117
10118 ins_cost(125); // XXX
10119 format %{ "incl $dst\t# int" %}
10120 ins_encode %{
10121 __ incrementl($dst$$Address);
10122 %}
10123 ins_pipe(ialu_mem_imm);
10124 %}
10125
10126 // XXX why does that use AddI
10127 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10128 %{
10129 predicate(!UseAPX && UseIncDec);
10130 match(Set dst (AddI dst src));
10131 effect(KILL cr);
10132
10133 format %{ "decl $dst\t# int" %}
10134 ins_encode %{
10135 __ decrementl($dst$$Register);
10136 %}
10137 ins_pipe(ialu_reg);
10138 %}
10139
10140 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10141 %{
10142 predicate(UseAPX && UseIncDec);
10143 match(Set dst (AddI src val));
10144 effect(KILL cr);
10145 flag(PD::Flag_ndd_demotable_opr1);
10146
10147 format %{ "edecl $dst, $src\t# int ndd" %}
10148 ins_encode %{
10149 __ edecl($dst$$Register, $src$$Register, false);
10150 %}
10151 ins_pipe(ialu_reg);
10152 %}
10153
10154 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10155 %{
10156 predicate(UseAPX && UseIncDec);
10157 match(Set dst (AddI (LoadI src) val));
10158 effect(KILL cr);
10159
10160 format %{ "edecl $dst, $src\t# int ndd" %}
10161 ins_encode %{
10162 __ edecl($dst$$Register, $src$$Address, false);
10163 %}
10164 ins_pipe(ialu_reg);
10165 %}
10166
10167 // XXX why does that use AddI
10168 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10169 %{
10170 predicate(UseIncDec);
10171 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10172 effect(KILL cr);
10173
10174 ins_cost(125); // XXX
10175 format %{ "decl $dst\t# int" %}
10176 ins_encode %{
10177 __ decrementl($dst$$Address);
10178 %}
10179 ins_pipe(ialu_mem_imm);
10180 %}
10181
10182 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10183 %{
10184 predicate(VM_Version::supports_fast_2op_lea());
10185 match(Set dst (AddI (LShiftI index scale) disp));
10186
10187 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10188 ins_encode %{
10189 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10190 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10191 %}
10192 ins_pipe(ialu_reg_reg);
10193 %}
10194
10195 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10196 %{
10197 predicate(VM_Version::supports_fast_3op_lea());
10198 match(Set dst (AddI (AddI base index) disp));
10199
10200 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10201 ins_encode %{
10202 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10203 %}
10204 ins_pipe(ialu_reg_reg);
10205 %}
10206
10207 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10208 %{
10209 predicate(VM_Version::supports_fast_2op_lea());
10210 match(Set dst (AddI base (LShiftI index scale)));
10211
10212 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10213 ins_encode %{
10214 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10215 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10216 %}
10217 ins_pipe(ialu_reg_reg);
10218 %}
10219
10220 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10221 %{
10222 predicate(VM_Version::supports_fast_3op_lea());
10223 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10224
10225 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10226 ins_encode %{
10227 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10228 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10229 %}
10230 ins_pipe(ialu_reg_reg);
10231 %}
10232
10233 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10234 %{
10235 predicate(!UseAPX);
10236 match(Set dst (AddL dst src));
10237 effect(KILL cr);
10238 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10239
10240 format %{ "addq $dst, $src\t# long" %}
10241 ins_encode %{
10242 __ addq($dst$$Register, $src$$Register);
10243 %}
10244 ins_pipe(ialu_reg_reg);
10245 %}
10246
10247 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10248 %{
10249 predicate(UseAPX);
10250 match(Set dst (AddL src1 src2));
10251 effect(KILL cr);
10252 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10253
10254 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10255 ins_encode %{
10256 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10257 %}
10258 ins_pipe(ialu_reg_reg);
10259 %}
10260
10261 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10262 %{
10263 predicate(!UseAPX);
10264 match(Set dst (AddL dst src));
10265 effect(KILL cr);
10266 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10267
10268 format %{ "addq $dst, $src\t# long" %}
10269 ins_encode %{
10270 __ addq($dst$$Register, $src$$constant);
10271 %}
10272 ins_pipe( ialu_reg );
10273 %}
10274
10275 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10276 %{
10277 predicate(UseAPX);
10278 match(Set dst (AddL src1 src2));
10279 effect(KILL cr);
10280 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10281
10282 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10283 ins_encode %{
10284 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10285 %}
10286 ins_pipe( ialu_reg );
10287 %}
10288
10289 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10290 %{
10291 predicate(UseAPX);
10292 match(Set dst (AddL (LoadL src1) src2));
10293 effect(KILL cr);
10294 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10295
10296 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10297 ins_encode %{
10298 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10299 %}
10300 ins_pipe( ialu_reg );
10301 %}
10302
10303 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10304 %{
10305 predicate(!UseAPX);
10306 match(Set dst (AddL dst (LoadL src)));
10307 effect(KILL cr);
10308 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10309
10310 ins_cost(150); // XXX
10311 format %{ "addq $dst, $src\t# long" %}
10312 ins_encode %{
10313 __ addq($dst$$Register, $src$$Address);
10314 %}
10315 ins_pipe(ialu_reg_mem);
10316 %}
10317
10318 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10319 %{
10320 predicate(UseAPX);
10321 match(Set dst (AddL src1 (LoadL src2)));
10322 effect(KILL cr);
10323 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10324
10325 ins_cost(150);
10326 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10327 ins_encode %{
10328 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10329 %}
10330 ins_pipe(ialu_reg_mem);
10331 %}
10332
10333 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10334 %{
10335 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10336 effect(KILL cr);
10337 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338
10339 ins_cost(150); // XXX
10340 format %{ "addq $dst, $src\t# long" %}
10341 ins_encode %{
10342 __ addq($dst$$Address, $src$$Register);
10343 %}
10344 ins_pipe(ialu_mem_reg);
10345 %}
10346
10347 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10348 %{
10349 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10350 effect(KILL cr);
10351 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10352
10353 ins_cost(125); // XXX
10354 format %{ "addq $dst, $src\t# long" %}
10355 ins_encode %{
10356 __ addq($dst$$Address, $src$$constant);
10357 %}
10358 ins_pipe(ialu_mem_imm);
10359 %}
10360
10361 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10362 %{
10363 predicate(!UseAPX && UseIncDec);
10364 match(Set dst (AddL dst src));
10365 effect(KILL cr);
10366
10367 format %{ "incq $dst\t# long" %}
10368 ins_encode %{
10369 __ incrementq($dst$$Register);
10370 %}
10371 ins_pipe(ialu_reg);
10372 %}
10373
10374 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10375 %{
10376 predicate(UseAPX && UseIncDec);
10377 match(Set dst (AddL src val));
10378 effect(KILL cr);
10379 flag(PD::Flag_ndd_demotable_opr1);
10380
10381 format %{ "eincq $dst, $src\t# long ndd" %}
10382 ins_encode %{
10383 __ eincq($dst$$Register, $src$$Register, false);
10384 %}
10385 ins_pipe(ialu_reg);
10386 %}
10387
10388 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10389 %{
10390 predicate(UseAPX && UseIncDec);
10391 match(Set dst (AddL (LoadL src) val));
10392 effect(KILL cr);
10393
10394 format %{ "eincq $dst, $src\t# long ndd" %}
10395 ins_encode %{
10396 __ eincq($dst$$Register, $src$$Address, false);
10397 %}
10398 ins_pipe(ialu_reg);
10399 %}
10400
10401 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10402 %{
10403 predicate(UseIncDec);
10404 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10405 effect(KILL cr);
10406
10407 ins_cost(125); // XXX
10408 format %{ "incq $dst\t# long" %}
10409 ins_encode %{
10410 __ incrementq($dst$$Address);
10411 %}
10412 ins_pipe(ialu_mem_imm);
10413 %}
10414
10415 // XXX why does that use AddL
10416 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10417 %{
10418 predicate(!UseAPX && UseIncDec);
10419 match(Set dst (AddL dst src));
10420 effect(KILL cr);
10421
10422 format %{ "decq $dst\t# long" %}
10423 ins_encode %{
10424 __ decrementq($dst$$Register);
10425 %}
10426 ins_pipe(ialu_reg);
10427 %}
10428
10429 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10430 %{
10431 predicate(UseAPX && UseIncDec);
10432 match(Set dst (AddL src val));
10433 effect(KILL cr);
10434 flag(PD::Flag_ndd_demotable_opr1);
10435
10436 format %{ "edecq $dst, $src\t# long ndd" %}
10437 ins_encode %{
10438 __ edecq($dst$$Register, $src$$Register, false);
10439 %}
10440 ins_pipe(ialu_reg);
10441 %}
10442
10443 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10444 %{
10445 predicate(UseAPX && UseIncDec);
10446 match(Set dst (AddL (LoadL src) val));
10447 effect(KILL cr);
10448
10449 format %{ "edecq $dst, $src\t# long ndd" %}
10450 ins_encode %{
10451 __ edecq($dst$$Register, $src$$Address, false);
10452 %}
10453 ins_pipe(ialu_reg);
10454 %}
10455
10456 // XXX why does that use AddL
10457 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10458 %{
10459 predicate(UseIncDec);
10460 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10461 effect(KILL cr);
10462
10463 ins_cost(125); // XXX
10464 format %{ "decq $dst\t# long" %}
10465 ins_encode %{
10466 __ decrementq($dst$$Address);
10467 %}
10468 ins_pipe(ialu_mem_imm);
10469 %}
10470
10471 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10472 %{
10473 predicate(VM_Version::supports_fast_2op_lea());
10474 match(Set dst (AddL (LShiftL index scale) disp));
10475
10476 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10477 ins_encode %{
10478 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10479 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10480 %}
10481 ins_pipe(ialu_reg_reg);
10482 %}
10483
10484 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10485 %{
10486 predicate(VM_Version::supports_fast_3op_lea());
10487 match(Set dst (AddL (AddL base index) disp));
10488
10489 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10490 ins_encode %{
10491 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10492 %}
10493 ins_pipe(ialu_reg_reg);
10494 %}
10495
10496 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10497 %{
10498 predicate(VM_Version::supports_fast_2op_lea());
10499 match(Set dst (AddL base (LShiftL index scale)));
10500
10501 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10502 ins_encode %{
10503 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10504 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10505 %}
10506 ins_pipe(ialu_reg_reg);
10507 %}
10508
10509 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10510 %{
10511 predicate(VM_Version::supports_fast_3op_lea());
10512 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10513
10514 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10515 ins_encode %{
10516 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10517 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10518 %}
10519 ins_pipe(ialu_reg_reg);
10520 %}
10521
10522 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10523 %{
10524 match(Set dst (AddP dst src));
10525 effect(KILL cr);
10526 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10527
10528 format %{ "addq $dst, $src\t# ptr" %}
10529 ins_encode %{
10530 __ addq($dst$$Register, $src$$Register);
10531 %}
10532 ins_pipe(ialu_reg_reg);
10533 %}
10534
10535 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10536 %{
10537 match(Set dst (AddP dst src));
10538 effect(KILL cr);
10539 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10540
10541 format %{ "addq $dst, $src\t# ptr" %}
10542 ins_encode %{
10543 __ addq($dst$$Register, $src$$constant);
10544 %}
10545 ins_pipe( ialu_reg );
10546 %}
10547
10548 // XXX addP mem ops ????
10549
10550 instruct checkCastPP(rRegP dst)
10551 %{
10552 match(Set dst (CheckCastPP dst));
10553
10554 size(0);
10555 format %{ "# checkcastPP of $dst" %}
10556 ins_encode(/* empty encoding */);
10557 ins_pipe(empty);
10558 %}
10559
10560 instruct castPP(rRegP dst)
10561 %{
10562 match(Set dst (CastPP dst));
10563
10564 size(0);
10565 format %{ "# castPP of $dst" %}
10566 ins_encode(/* empty encoding */);
10567 ins_pipe(empty);
10568 %}
10569
10570 instruct castII(rRegI dst)
10571 %{
10572 predicate(VerifyConstraintCasts == 0);
10573 match(Set dst (CastII dst));
10574
10575 size(0);
10576 format %{ "# castII of $dst" %}
10577 ins_encode(/* empty encoding */);
10578 ins_cost(0);
10579 ins_pipe(empty);
10580 %}
10581
10582 instruct castII_checked(rRegI dst, rFlagsReg cr)
10583 %{
10584 predicate(VerifyConstraintCasts > 0);
10585 match(Set dst (CastII dst));
10586
10587 effect(KILL cr);
10588 format %{ "# cast_checked_II $dst" %}
10589 ins_encode %{
10590 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10591 %}
10592 ins_pipe(pipe_slow);
10593 %}
10594
10595 instruct castLL(rRegL dst)
10596 %{
10597 predicate(VerifyConstraintCasts == 0);
10598 match(Set dst (CastLL dst));
10599
10600 size(0);
10601 format %{ "# castLL of $dst" %}
10602 ins_encode(/* empty encoding */);
10603 ins_cost(0);
10604 ins_pipe(empty);
10605 %}
10606
10607 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10608 %{
10609 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10610 match(Set dst (CastLL dst));
10611
10612 effect(KILL cr);
10613 format %{ "# cast_checked_LL $dst" %}
10614 ins_encode %{
10615 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10616 %}
10617 ins_pipe(pipe_slow);
10618 %}
10619
10620 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10621 %{
10622 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10623 match(Set dst (CastLL dst));
10624
10625 effect(KILL cr, TEMP tmp);
10626 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10627 ins_encode %{
10628 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10629 %}
10630 ins_pipe(pipe_slow);
10631 %}
10632
10633 instruct castFF(regF dst)
10634 %{
10635 match(Set dst (CastFF dst));
10636
10637 size(0);
10638 format %{ "# castFF of $dst" %}
10639 ins_encode(/* empty encoding */);
10640 ins_cost(0);
10641 ins_pipe(empty);
10642 %}
10643
10644 instruct castHH(regF dst)
10645 %{
10646 match(Set dst (CastHH dst));
10647
10648 size(0);
10649 format %{ "# castHH of $dst" %}
10650 ins_encode(/* empty encoding */);
10651 ins_cost(0);
10652 ins_pipe(empty);
10653 %}
10654
10655 instruct castDD(regD dst)
10656 %{
10657 match(Set dst (CastDD dst));
10658
10659 size(0);
10660 format %{ "# castDD of $dst" %}
10661 ins_encode(/* empty encoding */);
10662 ins_cost(0);
10663 ins_pipe(empty);
10664 %}
10665
10666 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10667 instruct compareAndSwapP(rRegI res,
10668 memory mem_ptr,
10669 rax_RegP oldval, rRegP newval,
10670 rFlagsReg cr)
10671 %{
10672 predicate(n->as_LoadStore()->barrier_data() == 0);
10673 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10674 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10675 effect(KILL cr, KILL oldval);
10676
10677 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10678 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10679 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10680 ins_encode %{
10681 __ lock();
10682 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10683 __ setcc(Assembler::equal, $res$$Register);
10684 %}
10685 ins_pipe( pipe_cmpxchg );
10686 %}
10687
10688 instruct compareAndSwapL(rRegI res,
10689 memory mem_ptr,
10690 rax_RegL oldval, rRegL newval,
10691 rFlagsReg cr)
10692 %{
10693 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10694 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10695 effect(KILL cr, KILL oldval);
10696
10697 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10698 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10699 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10700 ins_encode %{
10701 __ lock();
10702 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10703 __ setcc(Assembler::equal, $res$$Register);
10704 %}
10705 ins_pipe( pipe_cmpxchg );
10706 %}
10707
10708 instruct compareAndSwapI(rRegI res,
10709 memory mem_ptr,
10710 rax_RegI oldval, rRegI newval,
10711 rFlagsReg cr)
10712 %{
10713 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10714 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10715 effect(KILL cr, KILL oldval);
10716
10717 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10718 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10719 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10720 ins_encode %{
10721 __ lock();
10722 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10723 __ setcc(Assembler::equal, $res$$Register);
10724 %}
10725 ins_pipe( pipe_cmpxchg );
10726 %}
10727
10728 instruct compareAndSwapB(rRegI res,
10729 memory mem_ptr,
10730 rax_RegI oldval, rRegI newval,
10731 rFlagsReg cr)
10732 %{
10733 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10734 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10735 effect(KILL cr, KILL oldval);
10736
10737 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10738 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10739 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10740 ins_encode %{
10741 __ lock();
10742 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10743 __ setcc(Assembler::equal, $res$$Register);
10744 %}
10745 ins_pipe( pipe_cmpxchg );
10746 %}
10747
10748 instruct compareAndSwapS(rRegI res,
10749 memory mem_ptr,
10750 rax_RegI oldval, rRegI newval,
10751 rFlagsReg cr)
10752 %{
10753 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10754 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10755 effect(KILL cr, KILL oldval);
10756
10757 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10758 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10759 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10760 ins_encode %{
10761 __ lock();
10762 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10763 __ setcc(Assembler::equal, $res$$Register);
10764 %}
10765 ins_pipe( pipe_cmpxchg );
10766 %}
10767
10768 instruct compareAndSwapN(rRegI res,
10769 memory mem_ptr,
10770 rax_RegN oldval, rRegN newval,
10771 rFlagsReg cr) %{
10772 predicate(n->as_LoadStore()->barrier_data() == 0);
10773 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10774 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10775 effect(KILL cr, KILL oldval);
10776
10777 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10778 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10779 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10780 ins_encode %{
10781 __ lock();
10782 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10783 __ setcc(Assembler::equal, $res$$Register);
10784 %}
10785 ins_pipe( pipe_cmpxchg );
10786 %}
10787
10788 instruct compareAndExchangeB(
10789 memory mem_ptr,
10790 rax_RegI oldval, rRegI newval,
10791 rFlagsReg cr)
10792 %{
10793 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10794 effect(KILL cr);
10795
10796 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10797 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10798 ins_encode %{
10799 __ lock();
10800 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10801 %}
10802 ins_pipe( pipe_cmpxchg );
10803 %}
10804
10805 instruct compareAndExchangeS(
10806 memory mem_ptr,
10807 rax_RegI oldval, rRegI newval,
10808 rFlagsReg cr)
10809 %{
10810 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10811 effect(KILL cr);
10812
10813 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10814 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10815 ins_encode %{
10816 __ lock();
10817 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10818 %}
10819 ins_pipe( pipe_cmpxchg );
10820 %}
10821
10822 instruct compareAndExchangeI(
10823 memory mem_ptr,
10824 rax_RegI oldval, rRegI newval,
10825 rFlagsReg cr)
10826 %{
10827 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10828 effect(KILL cr);
10829
10830 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10831 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10832 ins_encode %{
10833 __ lock();
10834 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10835 %}
10836 ins_pipe( pipe_cmpxchg );
10837 %}
10838
10839 instruct compareAndExchangeL(
10840 memory mem_ptr,
10841 rax_RegL oldval, rRegL newval,
10842 rFlagsReg cr)
10843 %{
10844 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10845 effect(KILL cr);
10846
10847 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10848 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10849 ins_encode %{
10850 __ lock();
10851 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10852 %}
10853 ins_pipe( pipe_cmpxchg );
10854 %}
10855
10856 instruct compareAndExchangeN(
10857 memory mem_ptr,
10858 rax_RegN oldval, rRegN newval,
10859 rFlagsReg cr) %{
10860 predicate(n->as_LoadStore()->barrier_data() == 0);
10861 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10862 effect(KILL cr);
10863
10864 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10865 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10866 ins_encode %{
10867 __ lock();
10868 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10869 %}
10870 ins_pipe( pipe_cmpxchg );
10871 %}
10872
10873 instruct compareAndExchangeP(
10874 memory mem_ptr,
10875 rax_RegP oldval, rRegP newval,
10876 rFlagsReg cr)
10877 %{
10878 predicate(n->as_LoadStore()->barrier_data() == 0);
10879 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10880 effect(KILL cr);
10881
10882 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10883 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10884 ins_encode %{
10885 __ lock();
10886 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10887 %}
10888 ins_pipe( pipe_cmpxchg );
10889 %}
10890
10891 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10892 predicate(n->as_LoadStore()->result_not_used());
10893 match(Set dummy (GetAndAddB mem add));
10894 effect(KILL cr);
10895 format %{ "addb_lock $mem, $add" %}
10896 ins_encode %{
10897 __ lock();
10898 __ addb($mem$$Address, $add$$Register);
10899 %}
10900 ins_pipe(pipe_cmpxchg);
10901 %}
10902
10903 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10904 predicate(n->as_LoadStore()->result_not_used());
10905 match(Set dummy (GetAndAddB mem add));
10906 effect(KILL cr);
10907 format %{ "addb_lock $mem, $add" %}
10908 ins_encode %{
10909 __ lock();
10910 __ addb($mem$$Address, $add$$constant);
10911 %}
10912 ins_pipe(pipe_cmpxchg);
10913 %}
10914
10915 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10916 predicate(!n->as_LoadStore()->result_not_used());
10917 match(Set newval (GetAndAddB mem newval));
10918 effect(KILL cr);
10919 format %{ "xaddb_lock $mem, $newval" %}
10920 ins_encode %{
10921 __ lock();
10922 __ xaddb($mem$$Address, $newval$$Register);
10923 %}
10924 ins_pipe(pipe_cmpxchg);
10925 %}
10926
10927 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10928 predicate(n->as_LoadStore()->result_not_used());
10929 match(Set dummy (GetAndAddS mem add));
10930 effect(KILL cr);
10931 format %{ "addw_lock $mem, $add" %}
10932 ins_encode %{
10933 __ lock();
10934 __ addw($mem$$Address, $add$$Register);
10935 %}
10936 ins_pipe(pipe_cmpxchg);
10937 %}
10938
10939 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10940 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10941 match(Set dummy (GetAndAddS mem add));
10942 effect(KILL cr);
10943 format %{ "addw_lock $mem, $add" %}
10944 ins_encode %{
10945 __ lock();
10946 __ addw($mem$$Address, $add$$constant);
10947 %}
10948 ins_pipe(pipe_cmpxchg);
10949 %}
10950
10951 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10952 predicate(!n->as_LoadStore()->result_not_used());
10953 match(Set newval (GetAndAddS mem newval));
10954 effect(KILL cr);
10955 format %{ "xaddw_lock $mem, $newval" %}
10956 ins_encode %{
10957 __ lock();
10958 __ xaddw($mem$$Address, $newval$$Register);
10959 %}
10960 ins_pipe(pipe_cmpxchg);
10961 %}
10962
10963 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10964 predicate(n->as_LoadStore()->result_not_used());
10965 match(Set dummy (GetAndAddI mem add));
10966 effect(KILL cr);
10967 format %{ "addl_lock $mem, $add" %}
10968 ins_encode %{
10969 __ lock();
10970 __ addl($mem$$Address, $add$$Register);
10971 %}
10972 ins_pipe(pipe_cmpxchg);
10973 %}
10974
10975 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10976 predicate(n->as_LoadStore()->result_not_used());
10977 match(Set dummy (GetAndAddI mem add));
10978 effect(KILL cr);
10979 format %{ "addl_lock $mem, $add" %}
10980 ins_encode %{
10981 __ lock();
10982 __ addl($mem$$Address, $add$$constant);
10983 %}
10984 ins_pipe(pipe_cmpxchg);
10985 %}
10986
10987 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10988 predicate(!n->as_LoadStore()->result_not_used());
10989 match(Set newval (GetAndAddI mem newval));
10990 effect(KILL cr);
10991 format %{ "xaddl_lock $mem, $newval" %}
10992 ins_encode %{
10993 __ lock();
10994 __ xaddl($mem$$Address, $newval$$Register);
10995 %}
10996 ins_pipe(pipe_cmpxchg);
10997 %}
10998
10999 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11000 predicate(n->as_LoadStore()->result_not_used());
11001 match(Set dummy (GetAndAddL mem add));
11002 effect(KILL cr);
11003 format %{ "addq_lock $mem, $add" %}
11004 ins_encode %{
11005 __ lock();
11006 __ addq($mem$$Address, $add$$Register);
11007 %}
11008 ins_pipe(pipe_cmpxchg);
11009 %}
11010
11011 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11012 predicate(n->as_LoadStore()->result_not_used());
11013 match(Set dummy (GetAndAddL mem add));
11014 effect(KILL cr);
11015 format %{ "addq_lock $mem, $add" %}
11016 ins_encode %{
11017 __ lock();
11018 __ addq($mem$$Address, $add$$constant);
11019 %}
11020 ins_pipe(pipe_cmpxchg);
11021 %}
11022
11023 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11024 predicate(!n->as_LoadStore()->result_not_used());
11025 match(Set newval (GetAndAddL mem newval));
11026 effect(KILL cr);
11027 format %{ "xaddq_lock $mem, $newval" %}
11028 ins_encode %{
11029 __ lock();
11030 __ xaddq($mem$$Address, $newval$$Register);
11031 %}
11032 ins_pipe(pipe_cmpxchg);
11033 %}
11034
11035 instruct xchgB( memory mem, rRegI newval) %{
11036 match(Set newval (GetAndSetB mem newval));
11037 format %{ "XCHGB $newval,[$mem]" %}
11038 ins_encode %{
11039 __ xchgb($newval$$Register, $mem$$Address);
11040 %}
11041 ins_pipe( pipe_cmpxchg );
11042 %}
11043
11044 instruct xchgS( memory mem, rRegI newval) %{
11045 match(Set newval (GetAndSetS mem newval));
11046 format %{ "XCHGW $newval,[$mem]" %}
11047 ins_encode %{
11048 __ xchgw($newval$$Register, $mem$$Address);
11049 %}
11050 ins_pipe( pipe_cmpxchg );
11051 %}
11052
11053 instruct xchgI( memory mem, rRegI newval) %{
11054 match(Set newval (GetAndSetI mem newval));
11055 format %{ "XCHGL $newval,[$mem]" %}
11056 ins_encode %{
11057 __ xchgl($newval$$Register, $mem$$Address);
11058 %}
11059 ins_pipe( pipe_cmpxchg );
11060 %}
11061
11062 instruct xchgL( memory mem, rRegL newval) %{
11063 match(Set newval (GetAndSetL mem newval));
11064 format %{ "XCHGL $newval,[$mem]" %}
11065 ins_encode %{
11066 __ xchgq($newval$$Register, $mem$$Address);
11067 %}
11068 ins_pipe( pipe_cmpxchg );
11069 %}
11070
11071 instruct xchgP( memory mem, rRegP newval) %{
11072 match(Set newval (GetAndSetP mem newval));
11073 predicate(n->as_LoadStore()->barrier_data() == 0);
11074 format %{ "XCHGQ $newval,[$mem]" %}
11075 ins_encode %{
11076 __ xchgq($newval$$Register, $mem$$Address);
11077 %}
11078 ins_pipe( pipe_cmpxchg );
11079 %}
11080
11081 instruct xchgN( memory mem, rRegN newval) %{
11082 predicate(n->as_LoadStore()->barrier_data() == 0);
11083 match(Set newval (GetAndSetN mem newval));
11084 format %{ "XCHGL $newval,$mem]" %}
11085 ins_encode %{
11086 __ xchgl($newval$$Register, $mem$$Address);
11087 %}
11088 ins_pipe( pipe_cmpxchg );
11089 %}
11090
11091 //----------Abs Instructions-------------------------------------------
11092
11093 // Integer Absolute Instructions
11094 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11095 %{
11096 match(Set dst (AbsI src));
11097 effect(TEMP dst, KILL cr);
11098 format %{ "xorl $dst, $dst\t# abs int\n\t"
11099 "subl $dst, $src\n\t"
11100 "cmovll $dst, $src" %}
11101 ins_encode %{
11102 __ xorl($dst$$Register, $dst$$Register);
11103 __ subl($dst$$Register, $src$$Register);
11104 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11105 %}
11106
11107 ins_pipe(ialu_reg_reg);
11108 %}
11109
11110 // Long Absolute Instructions
11111 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11112 %{
11113 match(Set dst (AbsL src));
11114 effect(TEMP dst, KILL cr);
11115 format %{ "xorl $dst, $dst\t# abs long\n\t"
11116 "subq $dst, $src\n\t"
11117 "cmovlq $dst, $src" %}
11118 ins_encode %{
11119 __ xorl($dst$$Register, $dst$$Register);
11120 __ subq($dst$$Register, $src$$Register);
11121 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11122 %}
11123
11124 ins_pipe(ialu_reg_reg);
11125 %}
11126
11127 //----------Subtraction Instructions-------------------------------------------
11128
11129 // Integer Subtraction Instructions
11130 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11131 %{
11132 predicate(!UseAPX);
11133 match(Set dst (SubI dst src));
11134 effect(KILL cr);
11135 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11136
11137 format %{ "subl $dst, $src\t# int" %}
11138 ins_encode %{
11139 __ subl($dst$$Register, $src$$Register);
11140 %}
11141 ins_pipe(ialu_reg_reg);
11142 %}
11143
11144 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11145 %{
11146 predicate(UseAPX);
11147 match(Set dst (SubI src1 src2));
11148 effect(KILL cr);
11149 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11150
11151 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11152 ins_encode %{
11153 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11154 %}
11155 ins_pipe(ialu_reg_reg);
11156 %}
11157
11158 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11159 %{
11160 predicate(UseAPX);
11161 match(Set dst (SubI src1 src2));
11162 effect(KILL cr);
11163 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11164
11165 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11166 ins_encode %{
11167 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11168 %}
11169 ins_pipe(ialu_reg_reg);
11170 %}
11171
11172 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11173 %{
11174 predicate(UseAPX);
11175 match(Set dst (SubI (LoadI src1) src2));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178
11179 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11180 ins_encode %{
11181 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11182 %}
11183 ins_pipe(ialu_reg_reg);
11184 %}
11185
11186 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11187 %{
11188 predicate(!UseAPX);
11189 match(Set dst (SubI dst (LoadI src)));
11190 effect(KILL cr);
11191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192
11193 ins_cost(150);
11194 format %{ "subl $dst, $src\t# int" %}
11195 ins_encode %{
11196 __ subl($dst$$Register, $src$$Address);
11197 %}
11198 ins_pipe(ialu_reg_mem);
11199 %}
11200
11201 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11202 %{
11203 predicate(UseAPX);
11204 match(Set dst (SubI src1 (LoadI src2)));
11205 effect(KILL cr);
11206 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11207
11208 ins_cost(150);
11209 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11210 ins_encode %{
11211 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11212 %}
11213 ins_pipe(ialu_reg_mem);
11214 %}
11215
11216 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11217 %{
11218 predicate(UseAPX);
11219 match(Set dst (SubI (LoadI src1) src2));
11220 effect(KILL cr);
11221 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11222
11223 ins_cost(150);
11224 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11225 ins_encode %{
11226 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11227 %}
11228 ins_pipe(ialu_reg_mem);
11229 %}
11230
11231 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11232 %{
11233 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11234 effect(KILL cr);
11235 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11236
11237 ins_cost(150);
11238 format %{ "subl $dst, $src\t# int" %}
11239 ins_encode %{
11240 __ subl($dst$$Address, $src$$Register);
11241 %}
11242 ins_pipe(ialu_mem_reg);
11243 %}
11244
11245 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11246 %{
11247 predicate(!UseAPX);
11248 match(Set dst (SubL dst src));
11249 effect(KILL cr);
11250 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11251
11252 format %{ "subq $dst, $src\t# long" %}
11253 ins_encode %{
11254 __ subq($dst$$Register, $src$$Register);
11255 %}
11256 ins_pipe(ialu_reg_reg);
11257 %}
11258
11259 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11260 %{
11261 predicate(UseAPX);
11262 match(Set dst (SubL src1 src2));
11263 effect(KILL cr);
11264 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11265
11266 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11267 ins_encode %{
11268 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11269 %}
11270 ins_pipe(ialu_reg_reg);
11271 %}
11272
11273 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11274 %{
11275 predicate(UseAPX);
11276 match(Set dst (SubL src1 src2));
11277 effect(KILL cr);
11278 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11279
11280 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11281 ins_encode %{
11282 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11283 %}
11284 ins_pipe(ialu_reg_reg);
11285 %}
11286
11287 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11288 %{
11289 predicate(UseAPX);
11290 match(Set dst (SubL (LoadL src1) src2));
11291 effect(KILL cr);
11292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293
11294 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11295 ins_encode %{
11296 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11297 %}
11298 ins_pipe(ialu_reg_reg);
11299 %}
11300
11301 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11302 %{
11303 predicate(!UseAPX);
11304 match(Set dst (SubL dst (LoadL src)));
11305 effect(KILL cr);
11306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11307
11308 ins_cost(150);
11309 format %{ "subq $dst, $src\t# long" %}
11310 ins_encode %{
11311 __ subq($dst$$Register, $src$$Address);
11312 %}
11313 ins_pipe(ialu_reg_mem);
11314 %}
11315
11316 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11317 %{
11318 predicate(UseAPX);
11319 match(Set dst (SubL src1 (LoadL src2)));
11320 effect(KILL cr);
11321 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11322
11323 ins_cost(150);
11324 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11325 ins_encode %{
11326 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11327 %}
11328 ins_pipe(ialu_reg_mem);
11329 %}
11330
11331 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11332 %{
11333 predicate(UseAPX);
11334 match(Set dst (SubL (LoadL src1) src2));
11335 effect(KILL cr);
11336 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11337
11338 ins_cost(150);
11339 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11340 ins_encode %{
11341 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11342 %}
11343 ins_pipe(ialu_reg_mem);
11344 %}
11345
11346 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11347 %{
11348 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11349 effect(KILL cr);
11350 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11351
11352 ins_cost(150);
11353 format %{ "subq $dst, $src\t# long" %}
11354 ins_encode %{
11355 __ subq($dst$$Address, $src$$Register);
11356 %}
11357 ins_pipe(ialu_mem_reg);
11358 %}
11359
11360 // Subtract from a pointer
11361 // XXX hmpf???
11362 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11363 %{
11364 match(Set dst (AddP dst (SubI zero src)));
11365 effect(KILL cr);
11366
11367 format %{ "subq $dst, $src\t# ptr - int" %}
11368 ins_encode %{
11369 __ subq($dst$$Register, $src$$Register);
11370 %}
11371 ins_pipe(ialu_reg_reg);
11372 %}
11373
11374 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11375 %{
11376 predicate(!UseAPX);
11377 match(Set dst (SubI zero dst));
11378 effect(KILL cr);
11379 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11380
11381 format %{ "negl $dst\t# int" %}
11382 ins_encode %{
11383 __ negl($dst$$Register);
11384 %}
11385 ins_pipe(ialu_reg);
11386 %}
11387
11388 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11389 %{
11390 predicate(UseAPX);
11391 match(Set dst (SubI zero src));
11392 effect(KILL cr);
11393 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11394
11395 format %{ "enegl $dst, $src\t# int ndd" %}
11396 ins_encode %{
11397 __ enegl($dst$$Register, $src$$Register, false);
11398 %}
11399 ins_pipe(ialu_reg);
11400 %}
11401
11402 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11403 %{
11404 predicate(!UseAPX);
11405 match(Set dst (NegI dst));
11406 effect(KILL cr);
11407 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11408
11409 format %{ "negl $dst\t# int" %}
11410 ins_encode %{
11411 __ negl($dst$$Register);
11412 %}
11413 ins_pipe(ialu_reg);
11414 %}
11415
11416 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11417 %{
11418 predicate(UseAPX);
11419 match(Set dst (NegI src));
11420 effect(KILL cr);
11421 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11422
11423 format %{ "enegl $dst, $src\t# int ndd" %}
11424 ins_encode %{
11425 __ enegl($dst$$Register, $src$$Register, false);
11426 %}
11427 ins_pipe(ialu_reg);
11428 %}
11429
11430 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11431 %{
11432 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11433 effect(KILL cr);
11434 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11435
11436 format %{ "negl $dst\t# int" %}
11437 ins_encode %{
11438 __ negl($dst$$Address);
11439 %}
11440 ins_pipe(ialu_reg);
11441 %}
11442
11443 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11444 %{
11445 predicate(!UseAPX);
11446 match(Set dst (SubL zero dst));
11447 effect(KILL cr);
11448 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11449
11450 format %{ "negq $dst\t# long" %}
11451 ins_encode %{
11452 __ negq($dst$$Register);
11453 %}
11454 ins_pipe(ialu_reg);
11455 %}
11456
11457 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11458 %{
11459 predicate(UseAPX);
11460 match(Set dst (SubL zero src));
11461 effect(KILL cr);
11462 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11463
11464 format %{ "enegq $dst, $src\t# long ndd" %}
11465 ins_encode %{
11466 __ enegq($dst$$Register, $src$$Register, false);
11467 %}
11468 ins_pipe(ialu_reg);
11469 %}
11470
11471 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11472 %{
11473 predicate(!UseAPX);
11474 match(Set dst (NegL dst));
11475 effect(KILL cr);
11476 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11477
11478 format %{ "negq $dst\t# int" %}
11479 ins_encode %{
11480 __ negq($dst$$Register);
11481 %}
11482 ins_pipe(ialu_reg);
11483 %}
11484
11485 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11486 %{
11487 predicate(UseAPX);
11488 match(Set dst (NegL src));
11489 effect(KILL cr);
11490 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11491
11492 format %{ "enegq $dst, $src\t# long ndd" %}
11493 ins_encode %{
11494 __ enegq($dst$$Register, $src$$Register, false);
11495 %}
11496 ins_pipe(ialu_reg);
11497 %}
11498
11499 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11500 %{
11501 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11502 effect(KILL cr);
11503 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11504
11505 format %{ "negq $dst\t# long" %}
11506 ins_encode %{
11507 __ negq($dst$$Address);
11508 %}
11509 ins_pipe(ialu_reg);
11510 %}
11511
11512 //----------Multiplication/Division Instructions-------------------------------
11513 // Integer Multiplication Instructions
11514 // Multiply Register
11515
11516 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11517 %{
11518 predicate(!UseAPX);
11519 match(Set dst (MulI dst src));
11520 effect(KILL cr);
11521
11522 ins_cost(300);
11523 format %{ "imull $dst, $src\t# int" %}
11524 ins_encode %{
11525 __ imull($dst$$Register, $src$$Register);
11526 %}
11527 ins_pipe(ialu_reg_reg_alu0);
11528 %}
11529
11530 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11531 %{
11532 predicate(UseAPX);
11533 match(Set dst (MulI src1 src2));
11534 effect(KILL cr);
11535 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11536
11537 ins_cost(300);
11538 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11539 ins_encode %{
11540 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11541 %}
11542 ins_pipe(ialu_reg_reg_alu0);
11543 %}
11544
11545 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11546 %{
11547 match(Set dst (MulI src imm));
11548 effect(KILL cr);
11549
11550 ins_cost(300);
11551 format %{ "imull $dst, $src, $imm\t# int" %}
11552 ins_encode %{
11553 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11554 %}
11555 ins_pipe(ialu_reg_reg_alu0);
11556 %}
11557
11558 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11559 %{
11560 predicate(!UseAPX);
11561 match(Set dst (MulI dst (LoadI src)));
11562 effect(KILL cr);
11563
11564 ins_cost(350);
11565 format %{ "imull $dst, $src\t# int" %}
11566 ins_encode %{
11567 __ imull($dst$$Register, $src$$Address);
11568 %}
11569 ins_pipe(ialu_reg_mem_alu0);
11570 %}
11571
11572 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11573 %{
11574 predicate(UseAPX);
11575 match(Set dst (MulI src1 (LoadI src2)));
11576 effect(KILL cr);
11577 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11578
11579 ins_cost(350);
11580 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11581 ins_encode %{
11582 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11583 %}
11584 ins_pipe(ialu_reg_mem_alu0);
11585 %}
11586
11587 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11588 %{
11589 match(Set dst (MulI (LoadI src) imm));
11590 effect(KILL cr);
11591
11592 ins_cost(300);
11593 format %{ "imull $dst, $src, $imm\t# int" %}
11594 ins_encode %{
11595 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11596 %}
11597 ins_pipe(ialu_reg_mem_alu0);
11598 %}
11599
11600 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11601 %{
11602 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11603 effect(KILL cr, KILL src2);
11604
11605 expand %{ mulI_rReg(dst, src1, cr);
11606 mulI_rReg(src2, src3, cr);
11607 addI_rReg(dst, src2, cr); %}
11608 %}
11609
11610 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11611 %{
11612 predicate(!UseAPX);
11613 match(Set dst (MulL dst src));
11614 effect(KILL cr);
11615
11616 ins_cost(300);
11617 format %{ "imulq $dst, $src\t# long" %}
11618 ins_encode %{
11619 __ imulq($dst$$Register, $src$$Register);
11620 %}
11621 ins_pipe(ialu_reg_reg_alu0);
11622 %}
11623
11624 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11625 %{
11626 predicate(UseAPX);
11627 match(Set dst (MulL src1 src2));
11628 effect(KILL cr);
11629 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11630
11631 ins_cost(300);
11632 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11633 ins_encode %{
11634 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11635 %}
11636 ins_pipe(ialu_reg_reg_alu0);
11637 %}
11638
11639 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11640 %{
11641 match(Set dst (MulL src imm));
11642 effect(KILL cr);
11643
11644 ins_cost(300);
11645 format %{ "imulq $dst, $src, $imm\t# long" %}
11646 ins_encode %{
11647 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11648 %}
11649 ins_pipe(ialu_reg_reg_alu0);
11650 %}
11651
11652 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11653 %{
11654 predicate(!UseAPX);
11655 match(Set dst (MulL dst (LoadL src)));
11656 effect(KILL cr);
11657
11658 ins_cost(350);
11659 format %{ "imulq $dst, $src\t# long" %}
11660 ins_encode %{
11661 __ imulq($dst$$Register, $src$$Address);
11662 %}
11663 ins_pipe(ialu_reg_mem_alu0);
11664 %}
11665
11666 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11667 %{
11668 predicate(UseAPX);
11669 match(Set dst (MulL src1 (LoadL src2)));
11670 effect(KILL cr);
11671 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11672
11673 ins_cost(350);
11674 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11675 ins_encode %{
11676 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11677 %}
11678 ins_pipe(ialu_reg_mem_alu0);
11679 %}
11680
11681 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11682 %{
11683 match(Set dst (MulL (LoadL src) imm));
11684 effect(KILL cr);
11685
11686 ins_cost(300);
11687 format %{ "imulq $dst, $src, $imm\t# long" %}
11688 ins_encode %{
11689 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11690 %}
11691 ins_pipe(ialu_reg_mem_alu0);
11692 %}
11693
11694 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11695 %{
11696 match(Set dst (MulHiL src rax));
11697 effect(USE_KILL rax, KILL cr);
11698
11699 ins_cost(300);
11700 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11701 ins_encode %{
11702 __ imulq($src$$Register);
11703 %}
11704 ins_pipe(ialu_reg_reg_alu0);
11705 %}
11706
11707 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11708 %{
11709 match(Set dst (UMulHiL src rax));
11710 effect(USE_KILL rax, KILL cr);
11711
11712 ins_cost(300);
11713 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11714 ins_encode %{
11715 __ mulq($src$$Register);
11716 %}
11717 ins_pipe(ialu_reg_reg_alu0);
11718 %}
11719
11720 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11721 rFlagsReg cr)
11722 %{
11723 match(Set rax (DivI rax div));
11724 effect(KILL rdx, KILL cr);
11725
11726 ins_cost(30*100+10*100); // XXX
11727 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11728 "jne,s normal\n\t"
11729 "xorl rdx, rdx\n\t"
11730 "cmpl $div, -1\n\t"
11731 "je,s done\n"
11732 "normal: cdql\n\t"
11733 "idivl $div\n"
11734 "done:" %}
11735 ins_encode(cdql_enc(div));
11736 ins_pipe(ialu_reg_reg_alu0);
11737 %}
11738
11739 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11740 rFlagsReg cr)
11741 %{
11742 match(Set rax (DivL rax div));
11743 effect(KILL rdx, KILL cr);
11744
11745 ins_cost(30*100+10*100); // XXX
11746 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11747 "cmpq rax, rdx\n\t"
11748 "jne,s normal\n\t"
11749 "xorl rdx, rdx\n\t"
11750 "cmpq $div, -1\n\t"
11751 "je,s done\n"
11752 "normal: cdqq\n\t"
11753 "idivq $div\n"
11754 "done:" %}
11755 ins_encode(cdqq_enc(div));
11756 ins_pipe(ialu_reg_reg_alu0);
11757 %}
11758
11759 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11760 %{
11761 match(Set rax (UDivI rax div));
11762 effect(KILL rdx, KILL cr);
11763
11764 ins_cost(300);
11765 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11766 ins_encode %{
11767 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11768 %}
11769 ins_pipe(ialu_reg_reg_alu0);
11770 %}
11771
11772 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11773 %{
11774 match(Set rax (UDivL rax div));
11775 effect(KILL rdx, KILL cr);
11776
11777 ins_cost(300);
11778 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11779 ins_encode %{
11780 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11781 %}
11782 ins_pipe(ialu_reg_reg_alu0);
11783 %}
11784
11785 // Integer DIVMOD with Register, both quotient and mod results
11786 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11787 rFlagsReg cr)
11788 %{
11789 match(DivModI rax div);
11790 effect(KILL cr);
11791
11792 ins_cost(30*100+10*100); // XXX
11793 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11794 "jne,s normal\n\t"
11795 "xorl rdx, rdx\n\t"
11796 "cmpl $div, -1\n\t"
11797 "je,s done\n"
11798 "normal: cdql\n\t"
11799 "idivl $div\n"
11800 "done:" %}
11801 ins_encode(cdql_enc(div));
11802 ins_pipe(pipe_slow);
11803 %}
11804
11805 // Long DIVMOD with Register, both quotient and mod results
11806 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11807 rFlagsReg cr)
11808 %{
11809 match(DivModL rax div);
11810 effect(KILL cr);
11811
11812 ins_cost(30*100+10*100); // XXX
11813 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11814 "cmpq rax, rdx\n\t"
11815 "jne,s normal\n\t"
11816 "xorl rdx, rdx\n\t"
11817 "cmpq $div, -1\n\t"
11818 "je,s done\n"
11819 "normal: cdqq\n\t"
11820 "idivq $div\n"
11821 "done:" %}
11822 ins_encode(cdqq_enc(div));
11823 ins_pipe(pipe_slow);
11824 %}
11825
11826 // Unsigned integer DIVMOD with Register, both quotient and mod results
11827 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11828 no_rax_rdx_RegI div, rFlagsReg cr)
11829 %{
11830 match(UDivModI rax div);
11831 effect(TEMP tmp, KILL cr);
11832
11833 ins_cost(300);
11834 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11835 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11836 %}
11837 ins_encode %{
11838 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11839 %}
11840 ins_pipe(pipe_slow);
11841 %}
11842
11843 // Unsigned long DIVMOD with Register, both quotient and mod results
11844 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11845 no_rax_rdx_RegL div, rFlagsReg cr)
11846 %{
11847 match(UDivModL rax div);
11848 effect(TEMP tmp, KILL cr);
11849
11850 ins_cost(300);
11851 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11852 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11853 %}
11854 ins_encode %{
11855 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11856 %}
11857 ins_pipe(pipe_slow);
11858 %}
11859
11860 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11861 rFlagsReg cr)
11862 %{
11863 match(Set rdx (ModI rax div));
11864 effect(KILL rax, KILL cr);
11865
11866 ins_cost(300); // XXX
11867 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11868 "jne,s normal\n\t"
11869 "xorl rdx, rdx\n\t"
11870 "cmpl $div, -1\n\t"
11871 "je,s done\n"
11872 "normal: cdql\n\t"
11873 "idivl $div\n"
11874 "done:" %}
11875 ins_encode(cdql_enc(div));
11876 ins_pipe(ialu_reg_reg_alu0);
11877 %}
11878
11879 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11880 rFlagsReg cr)
11881 %{
11882 match(Set rdx (ModL rax div));
11883 effect(KILL rax, KILL cr);
11884
11885 ins_cost(300); // XXX
11886 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11887 "cmpq rax, rdx\n\t"
11888 "jne,s normal\n\t"
11889 "xorl rdx, rdx\n\t"
11890 "cmpq $div, -1\n\t"
11891 "je,s done\n"
11892 "normal: cdqq\n\t"
11893 "idivq $div\n"
11894 "done:" %}
11895 ins_encode(cdqq_enc(div));
11896 ins_pipe(ialu_reg_reg_alu0);
11897 %}
11898
11899 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11900 %{
11901 match(Set rdx (UModI rax div));
11902 effect(KILL rax, KILL cr);
11903
11904 ins_cost(300);
11905 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11906 ins_encode %{
11907 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11908 %}
11909 ins_pipe(ialu_reg_reg_alu0);
11910 %}
11911
11912 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11913 %{
11914 match(Set rdx (UModL rax div));
11915 effect(KILL rax, KILL cr);
11916
11917 ins_cost(300);
11918 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11919 ins_encode %{
11920 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11921 %}
11922 ins_pipe(ialu_reg_reg_alu0);
11923 %}
11924
11925 // Integer Shift Instructions
11926 // Shift Left by one, two, three
11927 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11928 %{
11929 predicate(!UseAPX);
11930 match(Set dst (LShiftI dst shift));
11931 effect(KILL cr);
11932
11933 format %{ "sall $dst, $shift" %}
11934 ins_encode %{
11935 __ sall($dst$$Register, $shift$$constant);
11936 %}
11937 ins_pipe(ialu_reg);
11938 %}
11939
11940 // Shift Left by one, two, three
11941 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11942 %{
11943 predicate(UseAPX);
11944 match(Set dst (LShiftI src shift));
11945 effect(KILL cr);
11946 flag(PD::Flag_ndd_demotable_opr1);
11947
11948 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11949 ins_encode %{
11950 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11951 %}
11952 ins_pipe(ialu_reg);
11953 %}
11954
11955 // Shift Left by 8-bit immediate
11956 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11957 %{
11958 predicate(!UseAPX);
11959 match(Set dst (LShiftI dst shift));
11960 effect(KILL cr);
11961
11962 format %{ "sall $dst, $shift" %}
11963 ins_encode %{
11964 __ sall($dst$$Register, $shift$$constant);
11965 %}
11966 ins_pipe(ialu_reg);
11967 %}
11968
11969 // Shift Left by 8-bit immediate
11970 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11971 %{
11972 predicate(UseAPX);
11973 match(Set dst (LShiftI src shift));
11974 effect(KILL cr);
11975 flag(PD::Flag_ndd_demotable_opr1);
11976
11977 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11978 ins_encode %{
11979 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11980 %}
11981 ins_pipe(ialu_reg);
11982 %}
11983
11984 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11985 %{
11986 predicate(UseAPX);
11987 match(Set dst (LShiftI (LoadI src) shift));
11988 effect(KILL cr);
11989
11990 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11991 ins_encode %{
11992 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11993 %}
11994 ins_pipe(ialu_reg);
11995 %}
11996
11997 // Shift Left by 8-bit immediate
11998 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11999 %{
12000 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12001 effect(KILL cr);
12002
12003 format %{ "sall $dst, $shift" %}
12004 ins_encode %{
12005 __ sall($dst$$Address, $shift$$constant);
12006 %}
12007 ins_pipe(ialu_mem_imm);
12008 %}
12009
12010 // Shift Left by variable
12011 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12012 %{
12013 predicate(!VM_Version::supports_bmi2());
12014 match(Set dst (LShiftI dst shift));
12015 effect(KILL cr);
12016
12017 format %{ "sall $dst, $shift" %}
12018 ins_encode %{
12019 __ sall($dst$$Register);
12020 %}
12021 ins_pipe(ialu_reg_reg);
12022 %}
12023
12024 // Shift Left by variable
12025 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12026 %{
12027 predicate(!VM_Version::supports_bmi2());
12028 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12029 effect(KILL cr);
12030
12031 format %{ "sall $dst, $shift" %}
12032 ins_encode %{
12033 __ sall($dst$$Address);
12034 %}
12035 ins_pipe(ialu_mem_reg);
12036 %}
12037
12038 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12039 %{
12040 predicate(VM_Version::supports_bmi2());
12041 match(Set dst (LShiftI src shift));
12042
12043 format %{ "shlxl $dst, $src, $shift" %}
12044 ins_encode %{
12045 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12046 %}
12047 ins_pipe(ialu_reg_reg);
12048 %}
12049
12050 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12051 %{
12052 predicate(VM_Version::supports_bmi2());
12053 match(Set dst (LShiftI (LoadI src) shift));
12054 ins_cost(175);
12055 format %{ "shlxl $dst, $src, $shift" %}
12056 ins_encode %{
12057 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12058 %}
12059 ins_pipe(ialu_reg_mem);
12060 %}
12061
12062 // Arithmetic Shift Right by 8-bit immediate
12063 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12064 %{
12065 predicate(!UseAPX);
12066 match(Set dst (RShiftI dst shift));
12067 effect(KILL cr);
12068
12069 format %{ "sarl $dst, $shift" %}
12070 ins_encode %{
12071 __ sarl($dst$$Register, $shift$$constant);
12072 %}
12073 ins_pipe(ialu_mem_imm);
12074 %}
12075
12076 // Arithmetic Shift Right by 8-bit immediate
12077 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12078 %{
12079 predicate(UseAPX);
12080 match(Set dst (RShiftI src shift));
12081 effect(KILL cr);
12082 flag(PD::Flag_ndd_demotable_opr1);
12083
12084 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12085 ins_encode %{
12086 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12087 %}
12088 ins_pipe(ialu_mem_imm);
12089 %}
12090
12091 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12092 %{
12093 predicate(UseAPX);
12094 match(Set dst (RShiftI (LoadI src) shift));
12095 effect(KILL cr);
12096
12097 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12098 ins_encode %{
12099 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12100 %}
12101 ins_pipe(ialu_mem_imm);
12102 %}
12103
12104 // Arithmetic Shift Right by 8-bit immediate
12105 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12106 %{
12107 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12108 effect(KILL cr);
12109
12110 format %{ "sarl $dst, $shift" %}
12111 ins_encode %{
12112 __ sarl($dst$$Address, $shift$$constant);
12113 %}
12114 ins_pipe(ialu_mem_imm);
12115 %}
12116
12117 // Arithmetic Shift Right by variable
12118 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12119 %{
12120 predicate(!VM_Version::supports_bmi2());
12121 match(Set dst (RShiftI dst shift));
12122 effect(KILL cr);
12123
12124 format %{ "sarl $dst, $shift" %}
12125 ins_encode %{
12126 __ sarl($dst$$Register);
12127 %}
12128 ins_pipe(ialu_reg_reg);
12129 %}
12130
12131 // Arithmetic Shift Right by variable
12132 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12133 %{
12134 predicate(!VM_Version::supports_bmi2());
12135 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12136 effect(KILL cr);
12137
12138 format %{ "sarl $dst, $shift" %}
12139 ins_encode %{
12140 __ sarl($dst$$Address);
12141 %}
12142 ins_pipe(ialu_mem_reg);
12143 %}
12144
12145 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12146 %{
12147 predicate(VM_Version::supports_bmi2());
12148 match(Set dst (RShiftI src shift));
12149
12150 format %{ "sarxl $dst, $src, $shift" %}
12151 ins_encode %{
12152 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12153 %}
12154 ins_pipe(ialu_reg_reg);
12155 %}
12156
12157 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12158 %{
12159 predicate(VM_Version::supports_bmi2());
12160 match(Set dst (RShiftI (LoadI src) shift));
12161 ins_cost(175);
12162 format %{ "sarxl $dst, $src, $shift" %}
12163 ins_encode %{
12164 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12165 %}
12166 ins_pipe(ialu_reg_mem);
12167 %}
12168
12169 // Logical Shift Right by 8-bit immediate
12170 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12171 %{
12172 predicate(!UseAPX);
12173 match(Set dst (URShiftI dst shift));
12174 effect(KILL cr);
12175
12176 format %{ "shrl $dst, $shift" %}
12177 ins_encode %{
12178 __ shrl($dst$$Register, $shift$$constant);
12179 %}
12180 ins_pipe(ialu_reg);
12181 %}
12182
12183 // Logical Shift Right by 8-bit immediate
12184 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12185 %{
12186 predicate(UseAPX);
12187 match(Set dst (URShiftI src shift));
12188 effect(KILL cr);
12189 flag(PD::Flag_ndd_demotable_opr1);
12190
12191 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12192 ins_encode %{
12193 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12194 %}
12195 ins_pipe(ialu_reg);
12196 %}
12197
12198 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12199 %{
12200 predicate(UseAPX);
12201 match(Set dst (URShiftI (LoadI src) shift));
12202 effect(KILL cr);
12203
12204 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12205 ins_encode %{
12206 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12207 %}
12208 ins_pipe(ialu_reg);
12209 %}
12210
12211 // Logical Shift Right by 8-bit immediate
12212 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12213 %{
12214 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12215 effect(KILL cr);
12216
12217 format %{ "shrl $dst, $shift" %}
12218 ins_encode %{
12219 __ shrl($dst$$Address, $shift$$constant);
12220 %}
12221 ins_pipe(ialu_mem_imm);
12222 %}
12223
12224 // Logical Shift Right by variable
12225 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12226 %{
12227 predicate(!VM_Version::supports_bmi2());
12228 match(Set dst (URShiftI dst shift));
12229 effect(KILL cr);
12230
12231 format %{ "shrl $dst, $shift" %}
12232 ins_encode %{
12233 __ shrl($dst$$Register);
12234 %}
12235 ins_pipe(ialu_reg_reg);
12236 %}
12237
12238 // Logical Shift Right by variable
12239 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12240 %{
12241 predicate(!VM_Version::supports_bmi2());
12242 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12243 effect(KILL cr);
12244
12245 format %{ "shrl $dst, $shift" %}
12246 ins_encode %{
12247 __ shrl($dst$$Address);
12248 %}
12249 ins_pipe(ialu_mem_reg);
12250 %}
12251
12252 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12253 %{
12254 predicate(VM_Version::supports_bmi2());
12255 match(Set dst (URShiftI src shift));
12256
12257 format %{ "shrxl $dst, $src, $shift" %}
12258 ins_encode %{
12259 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12260 %}
12261 ins_pipe(ialu_reg_reg);
12262 %}
12263
12264 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12265 %{
12266 predicate(VM_Version::supports_bmi2());
12267 match(Set dst (URShiftI (LoadI src) shift));
12268 ins_cost(175);
12269 format %{ "shrxl $dst, $src, $shift" %}
12270 ins_encode %{
12271 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12272 %}
12273 ins_pipe(ialu_reg_mem);
12274 %}
12275
12276 // Long Shift Instructions
12277 // Shift Left by one, two, three
12278 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12279 %{
12280 predicate(!UseAPX);
12281 match(Set dst (LShiftL dst shift));
12282 effect(KILL cr);
12283
12284 format %{ "salq $dst, $shift" %}
12285 ins_encode %{
12286 __ salq($dst$$Register, $shift$$constant);
12287 %}
12288 ins_pipe(ialu_reg);
12289 %}
12290
12291 // Shift Left by one, two, three
12292 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12293 %{
12294 predicate(UseAPX);
12295 match(Set dst (LShiftL src shift));
12296 effect(KILL cr);
12297 flag(PD::Flag_ndd_demotable_opr1);
12298
12299 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12300 ins_encode %{
12301 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12302 %}
12303 ins_pipe(ialu_reg);
12304 %}
12305
12306 // Shift Left by 8-bit immediate
12307 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12308 %{
12309 predicate(!UseAPX);
12310 match(Set dst (LShiftL dst shift));
12311 effect(KILL cr);
12312
12313 format %{ "salq $dst, $shift" %}
12314 ins_encode %{
12315 __ salq($dst$$Register, $shift$$constant);
12316 %}
12317 ins_pipe(ialu_reg);
12318 %}
12319
12320 // Shift Left by 8-bit immediate
12321 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12322 %{
12323 predicate(UseAPX);
12324 match(Set dst (LShiftL src shift));
12325 effect(KILL cr);
12326 flag(PD::Flag_ndd_demotable_opr1);
12327
12328 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12329 ins_encode %{
12330 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12331 %}
12332 ins_pipe(ialu_reg);
12333 %}
12334
12335 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12336 %{
12337 predicate(UseAPX);
12338 match(Set dst (LShiftL (LoadL src) shift));
12339 effect(KILL cr);
12340
12341 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12342 ins_encode %{
12343 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12344 %}
12345 ins_pipe(ialu_reg);
12346 %}
12347
12348 // Shift Left by 8-bit immediate
12349 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12350 %{
12351 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12352 effect(KILL cr);
12353
12354 format %{ "salq $dst, $shift" %}
12355 ins_encode %{
12356 __ salq($dst$$Address, $shift$$constant);
12357 %}
12358 ins_pipe(ialu_mem_imm);
12359 %}
12360
12361 // Shift Left by variable
12362 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12363 %{
12364 predicate(!VM_Version::supports_bmi2());
12365 match(Set dst (LShiftL dst shift));
12366 effect(KILL cr);
12367
12368 format %{ "salq $dst, $shift" %}
12369 ins_encode %{
12370 __ salq($dst$$Register);
12371 %}
12372 ins_pipe(ialu_reg_reg);
12373 %}
12374
12375 // Shift Left by variable
12376 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12377 %{
12378 predicate(!VM_Version::supports_bmi2());
12379 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12380 effect(KILL cr);
12381
12382 format %{ "salq $dst, $shift" %}
12383 ins_encode %{
12384 __ salq($dst$$Address);
12385 %}
12386 ins_pipe(ialu_mem_reg);
12387 %}
12388
12389 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12390 %{
12391 predicate(VM_Version::supports_bmi2());
12392 match(Set dst (LShiftL src shift));
12393
12394 format %{ "shlxq $dst, $src, $shift" %}
12395 ins_encode %{
12396 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12397 %}
12398 ins_pipe(ialu_reg_reg);
12399 %}
12400
12401 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12402 %{
12403 predicate(VM_Version::supports_bmi2());
12404 match(Set dst (LShiftL (LoadL src) shift));
12405 ins_cost(175);
12406 format %{ "shlxq $dst, $src, $shift" %}
12407 ins_encode %{
12408 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12409 %}
12410 ins_pipe(ialu_reg_mem);
12411 %}
12412
12413 // Arithmetic Shift Right by 8-bit immediate
12414 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12415 %{
12416 predicate(!UseAPX);
12417 match(Set dst (RShiftL dst shift));
12418 effect(KILL cr);
12419
12420 format %{ "sarq $dst, $shift" %}
12421 ins_encode %{
12422 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12423 %}
12424 ins_pipe(ialu_mem_imm);
12425 %}
12426
12427 // Arithmetic Shift Right by 8-bit immediate
12428 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12429 %{
12430 predicate(UseAPX);
12431 match(Set dst (RShiftL src shift));
12432 effect(KILL cr);
12433 flag(PD::Flag_ndd_demotable_opr1);
12434
12435 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12436 ins_encode %{
12437 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12438 %}
12439 ins_pipe(ialu_mem_imm);
12440 %}
12441
12442 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12443 %{
12444 predicate(UseAPX);
12445 match(Set dst (RShiftL (LoadL src) shift));
12446 effect(KILL cr);
12447
12448 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12449 ins_encode %{
12450 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12451 %}
12452 ins_pipe(ialu_mem_imm);
12453 %}
12454
12455 // Arithmetic Shift Right by 8-bit immediate
12456 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12457 %{
12458 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12459 effect(KILL cr);
12460
12461 format %{ "sarq $dst, $shift" %}
12462 ins_encode %{
12463 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12464 %}
12465 ins_pipe(ialu_mem_imm);
12466 %}
12467
12468 // Arithmetic Shift Right by variable
12469 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12470 %{
12471 predicate(!VM_Version::supports_bmi2());
12472 match(Set dst (RShiftL dst shift));
12473 effect(KILL cr);
12474
12475 format %{ "sarq $dst, $shift" %}
12476 ins_encode %{
12477 __ sarq($dst$$Register);
12478 %}
12479 ins_pipe(ialu_reg_reg);
12480 %}
12481
12482 // Arithmetic Shift Right by variable
12483 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12484 %{
12485 predicate(!VM_Version::supports_bmi2());
12486 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12487 effect(KILL cr);
12488
12489 format %{ "sarq $dst, $shift" %}
12490 ins_encode %{
12491 __ sarq($dst$$Address);
12492 %}
12493 ins_pipe(ialu_mem_reg);
12494 %}
12495
12496 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12497 %{
12498 predicate(VM_Version::supports_bmi2());
12499 match(Set dst (RShiftL src shift));
12500
12501 format %{ "sarxq $dst, $src, $shift" %}
12502 ins_encode %{
12503 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12504 %}
12505 ins_pipe(ialu_reg_reg);
12506 %}
12507
12508 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12509 %{
12510 predicate(VM_Version::supports_bmi2());
12511 match(Set dst (RShiftL (LoadL src) shift));
12512 ins_cost(175);
12513 format %{ "sarxq $dst, $src, $shift" %}
12514 ins_encode %{
12515 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12516 %}
12517 ins_pipe(ialu_reg_mem);
12518 %}
12519
12520 // Logical Shift Right by 8-bit immediate
12521 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12522 %{
12523 predicate(!UseAPX);
12524 match(Set dst (URShiftL dst shift));
12525 effect(KILL cr);
12526
12527 format %{ "shrq $dst, $shift" %}
12528 ins_encode %{
12529 __ shrq($dst$$Register, $shift$$constant);
12530 %}
12531 ins_pipe(ialu_reg);
12532 %}
12533
12534 // Logical Shift Right by 8-bit immediate
12535 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12536 %{
12537 predicate(UseAPX);
12538 match(Set dst (URShiftL src shift));
12539 effect(KILL cr);
12540 flag(PD::Flag_ndd_demotable_opr1);
12541
12542 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12543 ins_encode %{
12544 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12545 %}
12546 ins_pipe(ialu_reg);
12547 %}
12548
12549 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12550 %{
12551 predicate(UseAPX);
12552 match(Set dst (URShiftL (LoadL src) shift));
12553 effect(KILL cr);
12554
12555 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12556 ins_encode %{
12557 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12558 %}
12559 ins_pipe(ialu_reg);
12560 %}
12561
12562 // Logical Shift Right by 8-bit immediate
12563 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12564 %{
12565 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12566 effect(KILL cr);
12567
12568 format %{ "shrq $dst, $shift" %}
12569 ins_encode %{
12570 __ shrq($dst$$Address, $shift$$constant);
12571 %}
12572 ins_pipe(ialu_mem_imm);
12573 %}
12574
12575 // Logical Shift Right by variable
12576 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12577 %{
12578 predicate(!VM_Version::supports_bmi2());
12579 match(Set dst (URShiftL dst shift));
12580 effect(KILL cr);
12581
12582 format %{ "shrq $dst, $shift" %}
12583 ins_encode %{
12584 __ shrq($dst$$Register);
12585 %}
12586 ins_pipe(ialu_reg_reg);
12587 %}
12588
12589 // Logical Shift Right by variable
12590 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12591 %{
12592 predicate(!VM_Version::supports_bmi2());
12593 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12594 effect(KILL cr);
12595
12596 format %{ "shrq $dst, $shift" %}
12597 ins_encode %{
12598 __ shrq($dst$$Address);
12599 %}
12600 ins_pipe(ialu_mem_reg);
12601 %}
12602
12603 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12604 %{
12605 predicate(VM_Version::supports_bmi2());
12606 match(Set dst (URShiftL src shift));
12607
12608 format %{ "shrxq $dst, $src, $shift" %}
12609 ins_encode %{
12610 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12611 %}
12612 ins_pipe(ialu_reg_reg);
12613 %}
12614
12615 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12616 %{
12617 predicate(VM_Version::supports_bmi2());
12618 match(Set dst (URShiftL (LoadL src) shift));
12619 ins_cost(175);
12620 format %{ "shrxq $dst, $src, $shift" %}
12621 ins_encode %{
12622 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12623 %}
12624 ins_pipe(ialu_reg_mem);
12625 %}
12626
12627 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12628 // This idiom is used by the compiler for the i2b bytecode.
12629 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12630 %{
12631 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12632
12633 format %{ "movsbl $dst, $src\t# i2b" %}
12634 ins_encode %{
12635 __ movsbl($dst$$Register, $src$$Register);
12636 %}
12637 ins_pipe(ialu_reg_reg);
12638 %}
12639
12640 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12641 // This idiom is used by the compiler the i2s bytecode.
12642 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12643 %{
12644 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12645
12646 format %{ "movswl $dst, $src\t# i2s" %}
12647 ins_encode %{
12648 __ movswl($dst$$Register, $src$$Register);
12649 %}
12650 ins_pipe(ialu_reg_reg);
12651 %}
12652
12653 // ROL/ROR instructions
12654
12655 // Rotate left by constant.
12656 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12657 %{
12658 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12659 match(Set dst (RotateLeft dst shift));
12660 effect(KILL cr);
12661 format %{ "roll $dst, $shift" %}
12662 ins_encode %{
12663 __ roll($dst$$Register, $shift$$constant);
12664 %}
12665 ins_pipe(ialu_reg);
12666 %}
12667
12668 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12669 %{
12670 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12671 match(Set dst (RotateLeft src shift));
12672 format %{ "rolxl $dst, $src, $shift" %}
12673 ins_encode %{
12674 int shift = 32 - ($shift$$constant & 31);
12675 __ rorxl($dst$$Register, $src$$Register, shift);
12676 %}
12677 ins_pipe(ialu_reg_reg);
12678 %}
12679
12680 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12681 %{
12682 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12683 match(Set dst (RotateLeft (LoadI src) shift));
12684 ins_cost(175);
12685 format %{ "rolxl $dst, $src, $shift" %}
12686 ins_encode %{
12687 int shift = 32 - ($shift$$constant & 31);
12688 __ rorxl($dst$$Register, $src$$Address, shift);
12689 %}
12690 ins_pipe(ialu_reg_mem);
12691 %}
12692
12693 // Rotate Left by variable
12694 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12695 %{
12696 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12697 match(Set dst (RotateLeft dst shift));
12698 effect(KILL cr);
12699 format %{ "roll $dst, $shift" %}
12700 ins_encode %{
12701 __ roll($dst$$Register);
12702 %}
12703 ins_pipe(ialu_reg_reg);
12704 %}
12705
12706 // Rotate Left by variable
12707 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12708 %{
12709 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12710 match(Set dst (RotateLeft src shift));
12711 effect(KILL cr);
12712 flag(PD::Flag_ndd_demotable_opr1);
12713
12714 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12715 ins_encode %{
12716 __ eroll($dst$$Register, $src$$Register, false);
12717 %}
12718 ins_pipe(ialu_reg_reg);
12719 %}
12720
12721 // Rotate Right by constant.
12722 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12723 %{
12724 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12725 match(Set dst (RotateRight dst shift));
12726 effect(KILL cr);
12727 format %{ "rorl $dst, $shift" %}
12728 ins_encode %{
12729 __ rorl($dst$$Register, $shift$$constant);
12730 %}
12731 ins_pipe(ialu_reg);
12732 %}
12733
12734 // Rotate Right by constant.
12735 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12736 %{
12737 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12738 match(Set dst (RotateRight src shift));
12739 format %{ "rorxl $dst, $src, $shift" %}
12740 ins_encode %{
12741 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12742 %}
12743 ins_pipe(ialu_reg_reg);
12744 %}
12745
12746 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12747 %{
12748 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12749 match(Set dst (RotateRight (LoadI src) shift));
12750 ins_cost(175);
12751 format %{ "rorxl $dst, $src, $shift" %}
12752 ins_encode %{
12753 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12754 %}
12755 ins_pipe(ialu_reg_mem);
12756 %}
12757
12758 // Rotate Right by variable
12759 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12760 %{
12761 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12762 match(Set dst (RotateRight dst shift));
12763 effect(KILL cr);
12764 format %{ "rorl $dst, $shift" %}
12765 ins_encode %{
12766 __ rorl($dst$$Register);
12767 %}
12768 ins_pipe(ialu_reg_reg);
12769 %}
12770
12771 // Rotate Right by variable
12772 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12773 %{
12774 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12775 match(Set dst (RotateRight src shift));
12776 effect(KILL cr);
12777 flag(PD::Flag_ndd_demotable_opr1);
12778
12779 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12780 ins_encode %{
12781 __ erorl($dst$$Register, $src$$Register, false);
12782 %}
12783 ins_pipe(ialu_reg_reg);
12784 %}
12785
12786 // Rotate Left by constant.
12787 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12788 %{
12789 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12790 match(Set dst (RotateLeft dst shift));
12791 effect(KILL cr);
12792 format %{ "rolq $dst, $shift" %}
12793 ins_encode %{
12794 __ rolq($dst$$Register, $shift$$constant);
12795 %}
12796 ins_pipe(ialu_reg);
12797 %}
12798
12799 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12800 %{
12801 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12802 match(Set dst (RotateLeft src shift));
12803 format %{ "rolxq $dst, $src, $shift" %}
12804 ins_encode %{
12805 int shift = 64 - ($shift$$constant & 63);
12806 __ rorxq($dst$$Register, $src$$Register, shift);
12807 %}
12808 ins_pipe(ialu_reg_reg);
12809 %}
12810
12811 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12812 %{
12813 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12814 match(Set dst (RotateLeft (LoadL src) shift));
12815 ins_cost(175);
12816 format %{ "rolxq $dst, $src, $shift" %}
12817 ins_encode %{
12818 int shift = 64 - ($shift$$constant & 63);
12819 __ rorxq($dst$$Register, $src$$Address, shift);
12820 %}
12821 ins_pipe(ialu_reg_mem);
12822 %}
12823
12824 // Rotate Left by variable
12825 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12826 %{
12827 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12828 match(Set dst (RotateLeft dst shift));
12829 effect(KILL cr);
12830
12831 format %{ "rolq $dst, $shift" %}
12832 ins_encode %{
12833 __ rolq($dst$$Register);
12834 %}
12835 ins_pipe(ialu_reg_reg);
12836 %}
12837
12838 // Rotate Left by variable
12839 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12840 %{
12841 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12842 match(Set dst (RotateLeft src shift));
12843 effect(KILL cr);
12844 flag(PD::Flag_ndd_demotable_opr1);
12845
12846 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12847 ins_encode %{
12848 __ erolq($dst$$Register, $src$$Register, false);
12849 %}
12850 ins_pipe(ialu_reg_reg);
12851 %}
12852
12853 // Rotate Right by constant.
12854 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12855 %{
12856 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12857 match(Set dst (RotateRight dst shift));
12858 effect(KILL cr);
12859 format %{ "rorq $dst, $shift" %}
12860 ins_encode %{
12861 __ rorq($dst$$Register, $shift$$constant);
12862 %}
12863 ins_pipe(ialu_reg);
12864 %}
12865
12866 // Rotate Right by constant
12867 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12868 %{
12869 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12870 match(Set dst (RotateRight src shift));
12871 format %{ "rorxq $dst, $src, $shift" %}
12872 ins_encode %{
12873 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12874 %}
12875 ins_pipe(ialu_reg_reg);
12876 %}
12877
12878 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12879 %{
12880 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12881 match(Set dst (RotateRight (LoadL src) shift));
12882 ins_cost(175);
12883 format %{ "rorxq $dst, $src, $shift" %}
12884 ins_encode %{
12885 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12886 %}
12887 ins_pipe(ialu_reg_mem);
12888 %}
12889
12890 // Rotate Right by variable
12891 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12892 %{
12893 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12894 match(Set dst (RotateRight dst shift));
12895 effect(KILL cr);
12896 format %{ "rorq $dst, $shift" %}
12897 ins_encode %{
12898 __ rorq($dst$$Register);
12899 %}
12900 ins_pipe(ialu_reg_reg);
12901 %}
12902
12903 // Rotate Right by variable
12904 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12905 %{
12906 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12907 match(Set dst (RotateRight src shift));
12908 effect(KILL cr);
12909 flag(PD::Flag_ndd_demotable_opr1);
12910
12911 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12912 ins_encode %{
12913 __ erorq($dst$$Register, $src$$Register, false);
12914 %}
12915 ins_pipe(ialu_reg_reg);
12916 %}
12917
12918 //----------------------------- CompressBits/ExpandBits ------------------------
12919
12920 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12921 predicate(n->bottom_type()->isa_long());
12922 match(Set dst (CompressBits src mask));
12923 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12924 ins_encode %{
12925 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12926 %}
12927 ins_pipe( pipe_slow );
12928 %}
12929
12930 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12931 predicate(n->bottom_type()->isa_long());
12932 match(Set dst (ExpandBits src mask));
12933 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12934 ins_encode %{
12935 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12936 %}
12937 ins_pipe( pipe_slow );
12938 %}
12939
12940 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12941 predicate(n->bottom_type()->isa_long());
12942 match(Set dst (CompressBits src (LoadL mask)));
12943 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12944 ins_encode %{
12945 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12946 %}
12947 ins_pipe( pipe_slow );
12948 %}
12949
12950 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12951 predicate(n->bottom_type()->isa_long());
12952 match(Set dst (ExpandBits src (LoadL mask)));
12953 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12954 ins_encode %{
12955 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12956 %}
12957 ins_pipe( pipe_slow );
12958 %}
12959
12960
12961 // Logical Instructions
12962
12963 // Integer Logical Instructions
12964
12965 // And Instructions
12966 // And Register with Register
12967 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12968 %{
12969 predicate(!UseAPX);
12970 match(Set dst (AndI dst src));
12971 effect(KILL cr);
12972 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12973
12974 format %{ "andl $dst, $src\t# int" %}
12975 ins_encode %{
12976 __ andl($dst$$Register, $src$$Register);
12977 %}
12978 ins_pipe(ialu_reg_reg);
12979 %}
12980
12981 // And Register with Register using New Data Destination (NDD)
12982 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12983 %{
12984 predicate(UseAPX);
12985 match(Set dst (AndI src1 src2));
12986 effect(KILL cr);
12987 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12988
12989 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12990 ins_encode %{
12991 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12992
12993 %}
12994 ins_pipe(ialu_reg_reg);
12995 %}
12996
12997 // And Register with Immediate 255
12998 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12999 %{
13000 match(Set dst (AndI src mask));
13001
13002 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13003 ins_encode %{
13004 __ movzbl($dst$$Register, $src$$Register);
13005 %}
13006 ins_pipe(ialu_reg);
13007 %}
13008
13009 // And Register with Immediate 255 and promote to long
13010 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13011 %{
13012 match(Set dst (ConvI2L (AndI src mask)));
13013
13014 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13015 ins_encode %{
13016 __ movzbl($dst$$Register, $src$$Register);
13017 %}
13018 ins_pipe(ialu_reg);
13019 %}
13020
13021 // And Register with Immediate 65535
13022 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13023 %{
13024 match(Set dst (AndI src mask));
13025
13026 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13027 ins_encode %{
13028 __ movzwl($dst$$Register, $src$$Register);
13029 %}
13030 ins_pipe(ialu_reg);
13031 %}
13032
13033 // And Register with Immediate 65535 and promote to long
13034 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13035 %{
13036 match(Set dst (ConvI2L (AndI src mask)));
13037
13038 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13039 ins_encode %{
13040 __ movzwl($dst$$Register, $src$$Register);
13041 %}
13042 ins_pipe(ialu_reg);
13043 %}
13044
13045 // Can skip int2long conversions after AND with small bitmask
13046 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13047 %{
13048 predicate(VM_Version::supports_bmi2());
13049 ins_cost(125);
13050 effect(TEMP tmp, KILL cr);
13051 match(Set dst (ConvI2L (AndI src mask)));
13052 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13053 ins_encode %{
13054 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13055 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13056 %}
13057 ins_pipe(ialu_reg_reg);
13058 %}
13059
13060 // And Register with Immediate
13061 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13062 %{
13063 predicate(!UseAPX);
13064 match(Set dst (AndI dst src));
13065 effect(KILL cr);
13066 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13067
13068 format %{ "andl $dst, $src\t# int" %}
13069 ins_encode %{
13070 __ andl($dst$$Register, $src$$constant);
13071 %}
13072 ins_pipe(ialu_reg);
13073 %}
13074
13075 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13076 %{
13077 predicate(UseAPX);
13078 match(Set dst (AndI src1 src2));
13079 effect(KILL cr);
13080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13081
13082 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13083 ins_encode %{
13084 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13085 %}
13086 ins_pipe(ialu_reg);
13087 %}
13088
13089 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13090 %{
13091 predicate(UseAPX);
13092 match(Set dst (AndI (LoadI src1) src2));
13093 effect(KILL cr);
13094 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13095
13096 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13097 ins_encode %{
13098 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13099 %}
13100 ins_pipe(ialu_reg);
13101 %}
13102
13103 // And Register with Memory
13104 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13105 %{
13106 predicate(!UseAPX);
13107 match(Set dst (AndI dst (LoadI src)));
13108 effect(KILL cr);
13109 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110
13111 ins_cost(150);
13112 format %{ "andl $dst, $src\t# int" %}
13113 ins_encode %{
13114 __ andl($dst$$Register, $src$$Address);
13115 %}
13116 ins_pipe(ialu_reg_mem);
13117 %}
13118
13119 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13120 %{
13121 predicate(UseAPX);
13122 match(Set dst (AndI src1 (LoadI src2)));
13123 effect(KILL cr);
13124 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13125
13126 ins_cost(150);
13127 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13128 ins_encode %{
13129 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13130 %}
13131 ins_pipe(ialu_reg_mem);
13132 %}
13133
13134 // And Memory with Register
13135 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13136 %{
13137 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13138 effect(KILL cr);
13139 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140
13141 ins_cost(150);
13142 format %{ "andb $dst, $src\t# byte" %}
13143 ins_encode %{
13144 __ andb($dst$$Address, $src$$Register);
13145 %}
13146 ins_pipe(ialu_mem_reg);
13147 %}
13148
13149 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13150 %{
13151 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13152 effect(KILL cr);
13153 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13154
13155 ins_cost(150);
13156 format %{ "andl $dst, $src\t# int" %}
13157 ins_encode %{
13158 __ andl($dst$$Address, $src$$Register);
13159 %}
13160 ins_pipe(ialu_mem_reg);
13161 %}
13162
13163 // And Memory with Immediate
13164 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13165 %{
13166 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13167 effect(KILL cr);
13168 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13169
13170 ins_cost(125);
13171 format %{ "andl $dst, $src\t# int" %}
13172 ins_encode %{
13173 __ andl($dst$$Address, $src$$constant);
13174 %}
13175 ins_pipe(ialu_mem_imm);
13176 %}
13177
13178 // BMI1 instructions
13179 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13180 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13181 predicate(UseBMI1Instructions);
13182 effect(KILL cr);
13183 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13184
13185 ins_cost(125);
13186 format %{ "andnl $dst, $src1, $src2" %}
13187
13188 ins_encode %{
13189 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13190 %}
13191 ins_pipe(ialu_reg_mem);
13192 %}
13193
13194 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13195 match(Set dst (AndI (XorI src1 minus_1) src2));
13196 predicate(UseBMI1Instructions);
13197 effect(KILL cr);
13198 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13199
13200 format %{ "andnl $dst, $src1, $src2" %}
13201
13202 ins_encode %{
13203 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13204 %}
13205 ins_pipe(ialu_reg);
13206 %}
13207
13208 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13209 match(Set dst (AndI (SubI imm_zero src) src));
13210 predicate(UseBMI1Instructions);
13211 effect(KILL cr);
13212 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13213
13214 format %{ "blsil $dst, $src" %}
13215
13216 ins_encode %{
13217 __ blsil($dst$$Register, $src$$Register);
13218 %}
13219 ins_pipe(ialu_reg);
13220 %}
13221
13222 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13223 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13224 predicate(UseBMI1Instructions);
13225 effect(KILL cr);
13226 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13227
13228 ins_cost(125);
13229 format %{ "blsil $dst, $src" %}
13230
13231 ins_encode %{
13232 __ blsil($dst$$Register, $src$$Address);
13233 %}
13234 ins_pipe(ialu_reg_mem);
13235 %}
13236
13237 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13238 %{
13239 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13240 predicate(UseBMI1Instructions);
13241 effect(KILL cr);
13242 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13243
13244 ins_cost(125);
13245 format %{ "blsmskl $dst, $src" %}
13246
13247 ins_encode %{
13248 __ blsmskl($dst$$Register, $src$$Address);
13249 %}
13250 ins_pipe(ialu_reg_mem);
13251 %}
13252
13253 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13254 %{
13255 match(Set dst (XorI (AddI src minus_1) src));
13256 predicate(UseBMI1Instructions);
13257 effect(KILL cr);
13258 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13259
13260 format %{ "blsmskl $dst, $src" %}
13261
13262 ins_encode %{
13263 __ blsmskl($dst$$Register, $src$$Register);
13264 %}
13265
13266 ins_pipe(ialu_reg);
13267 %}
13268
13269 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13270 %{
13271 match(Set dst (AndI (AddI src minus_1) src) );
13272 predicate(UseBMI1Instructions);
13273 effect(KILL cr);
13274 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13275
13276 format %{ "blsrl $dst, $src" %}
13277
13278 ins_encode %{
13279 __ blsrl($dst$$Register, $src$$Register);
13280 %}
13281
13282 ins_pipe(ialu_reg_mem);
13283 %}
13284
13285 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13286 %{
13287 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13288 predicate(UseBMI1Instructions);
13289 effect(KILL cr);
13290 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13291
13292 ins_cost(125);
13293 format %{ "blsrl $dst, $src" %}
13294
13295 ins_encode %{
13296 __ blsrl($dst$$Register, $src$$Address);
13297 %}
13298
13299 ins_pipe(ialu_reg);
13300 %}
13301
13302 // Or Instructions
13303 // Or Register with Register
13304 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13305 %{
13306 predicate(!UseAPX);
13307 match(Set dst (OrI dst src));
13308 effect(KILL cr);
13309 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13310
13311 format %{ "orl $dst, $src\t# int" %}
13312 ins_encode %{
13313 __ orl($dst$$Register, $src$$Register);
13314 %}
13315 ins_pipe(ialu_reg_reg);
13316 %}
13317
13318 // Or Register with Register using New Data Destination (NDD)
13319 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13320 %{
13321 predicate(UseAPX);
13322 match(Set dst (OrI src1 src2));
13323 effect(KILL cr);
13324 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13325
13326 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13327 ins_encode %{
13328 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13329 %}
13330 ins_pipe(ialu_reg_reg);
13331 %}
13332
13333 // Or Register with Immediate
13334 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13335 %{
13336 predicate(!UseAPX);
13337 match(Set dst (OrI dst src));
13338 effect(KILL cr);
13339 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13340
13341 format %{ "orl $dst, $src\t# int" %}
13342 ins_encode %{
13343 __ orl($dst$$Register, $src$$constant);
13344 %}
13345 ins_pipe(ialu_reg);
13346 %}
13347
13348 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13349 %{
13350 predicate(UseAPX);
13351 match(Set dst (OrI src1 src2));
13352 effect(KILL cr);
13353 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13354
13355 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13356 ins_encode %{
13357 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13358 %}
13359 ins_pipe(ialu_reg);
13360 %}
13361
13362 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13363 %{
13364 predicate(UseAPX);
13365 match(Set dst (OrI src1 src2));
13366 effect(KILL cr);
13367 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13368
13369 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13370 ins_encode %{
13371 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13372 %}
13373 ins_pipe(ialu_reg);
13374 %}
13375
13376 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13377 %{
13378 predicate(UseAPX);
13379 match(Set dst (OrI (LoadI src1) src2));
13380 effect(KILL cr);
13381 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13382
13383 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13384 ins_encode %{
13385 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13386 %}
13387 ins_pipe(ialu_reg);
13388 %}
13389
13390 // Or Register with Memory
13391 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13392 %{
13393 predicate(!UseAPX);
13394 match(Set dst (OrI dst (LoadI src)));
13395 effect(KILL cr);
13396 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13397
13398 ins_cost(150);
13399 format %{ "orl $dst, $src\t# int" %}
13400 ins_encode %{
13401 __ orl($dst$$Register, $src$$Address);
13402 %}
13403 ins_pipe(ialu_reg_mem);
13404 %}
13405
13406 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13407 %{
13408 predicate(UseAPX);
13409 match(Set dst (OrI src1 (LoadI src2)));
13410 effect(KILL cr);
13411 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13412
13413 ins_cost(150);
13414 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13415 ins_encode %{
13416 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13417 %}
13418 ins_pipe(ialu_reg_mem);
13419 %}
13420
13421 // Or Memory with Register
13422 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13423 %{
13424 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13425 effect(KILL cr);
13426 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13427
13428 ins_cost(150);
13429 format %{ "orb $dst, $src\t# byte" %}
13430 ins_encode %{
13431 __ orb($dst$$Address, $src$$Register);
13432 %}
13433 ins_pipe(ialu_mem_reg);
13434 %}
13435
13436 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13437 %{
13438 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13439 effect(KILL cr);
13440 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13441
13442 ins_cost(150);
13443 format %{ "orl $dst, $src\t# int" %}
13444 ins_encode %{
13445 __ orl($dst$$Address, $src$$Register);
13446 %}
13447 ins_pipe(ialu_mem_reg);
13448 %}
13449
13450 // Or Memory with Immediate
13451 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13452 %{
13453 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13454 effect(KILL cr);
13455 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13456
13457 ins_cost(125);
13458 format %{ "orl $dst, $src\t# int" %}
13459 ins_encode %{
13460 __ orl($dst$$Address, $src$$constant);
13461 %}
13462 ins_pipe(ialu_mem_imm);
13463 %}
13464
13465 // Xor Instructions
13466 // Xor Register with Register
13467 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13468 %{
13469 predicate(!UseAPX);
13470 match(Set dst (XorI dst src));
13471 effect(KILL cr);
13472 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13473
13474 format %{ "xorl $dst, $src\t# int" %}
13475 ins_encode %{
13476 __ xorl($dst$$Register, $src$$Register);
13477 %}
13478 ins_pipe(ialu_reg_reg);
13479 %}
13480
13481 // Xor Register with Register using New Data Destination (NDD)
13482 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13483 %{
13484 predicate(UseAPX);
13485 match(Set dst (XorI src1 src2));
13486 effect(KILL cr);
13487 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13488
13489 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13490 ins_encode %{
13491 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13492 %}
13493 ins_pipe(ialu_reg_reg);
13494 %}
13495
13496 // Xor Register with Immediate -1
13497 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13498 %{
13499 predicate(!UseAPX);
13500 match(Set dst (XorI dst imm));
13501
13502 format %{ "notl $dst" %}
13503 ins_encode %{
13504 __ notl($dst$$Register);
13505 %}
13506 ins_pipe(ialu_reg);
13507 %}
13508
13509 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13510 %{
13511 match(Set dst (XorI src imm));
13512 predicate(UseAPX);
13513 flag(PD::Flag_ndd_demotable_opr1);
13514
13515 format %{ "enotl $dst, $src" %}
13516 ins_encode %{
13517 __ enotl($dst$$Register, $src$$Register);
13518 %}
13519 ins_pipe(ialu_reg);
13520 %}
13521
13522 // Xor Register with Immediate
13523 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13524 %{
13525 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13526 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13527 match(Set dst (XorI dst src));
13528 effect(KILL cr);
13529 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13530
13531 format %{ "xorl $dst, $src\t# int" %}
13532 ins_encode %{
13533 __ xorl($dst$$Register, $src$$constant);
13534 %}
13535 ins_pipe(ialu_reg);
13536 %}
13537
13538 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13539 %{
13540 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13541 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13542 match(Set dst (XorI src1 src2));
13543 effect(KILL cr);
13544 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13545
13546 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13547 ins_encode %{
13548 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13549 %}
13550 ins_pipe(ialu_reg);
13551 %}
13552
13553 // Xor Memory with Immediate
13554 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13555 %{
13556 predicate(UseAPX);
13557 match(Set dst (XorI (LoadI src1) src2));
13558 effect(KILL cr);
13559 ins_cost(150);
13560 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13561
13562 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13563 ins_encode %{
13564 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13565 %}
13566 ins_pipe(ialu_reg);
13567 %}
13568
13569 // Xor Register with Memory
13570 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13571 %{
13572 predicate(!UseAPX);
13573 match(Set dst (XorI dst (LoadI src)));
13574 effect(KILL cr);
13575 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13576
13577 ins_cost(150);
13578 format %{ "xorl $dst, $src\t# int" %}
13579 ins_encode %{
13580 __ xorl($dst$$Register, $src$$Address);
13581 %}
13582 ins_pipe(ialu_reg_mem);
13583 %}
13584
13585 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13586 %{
13587 predicate(UseAPX);
13588 match(Set dst (XorI src1 (LoadI src2)));
13589 effect(KILL cr);
13590 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13591
13592 ins_cost(150);
13593 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13594 ins_encode %{
13595 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13596 %}
13597 ins_pipe(ialu_reg_mem);
13598 %}
13599
13600 // Xor Memory with Register
13601 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13602 %{
13603 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13604 effect(KILL cr);
13605 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13606
13607 ins_cost(150);
13608 format %{ "xorb $dst, $src\t# byte" %}
13609 ins_encode %{
13610 __ xorb($dst$$Address, $src$$Register);
13611 %}
13612 ins_pipe(ialu_mem_reg);
13613 %}
13614
13615 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13616 %{
13617 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13618 effect(KILL cr);
13619 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13620
13621 ins_cost(150);
13622 format %{ "xorl $dst, $src\t# int" %}
13623 ins_encode %{
13624 __ xorl($dst$$Address, $src$$Register);
13625 %}
13626 ins_pipe(ialu_mem_reg);
13627 %}
13628
13629 // Xor Memory with Immediate
13630 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13631 %{
13632 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13633 effect(KILL cr);
13634 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13635
13636 ins_cost(125);
13637 format %{ "xorl $dst, $src\t# int" %}
13638 ins_encode %{
13639 __ xorl($dst$$Address, $src$$constant);
13640 %}
13641 ins_pipe(ialu_mem_imm);
13642 %}
13643
13644
13645 // Long Logical Instructions
13646
13647 // And Instructions
13648 // And Register with Register
13649 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13650 %{
13651 predicate(!UseAPX);
13652 match(Set dst (AndL dst src));
13653 effect(KILL cr);
13654 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13655
13656 format %{ "andq $dst, $src\t# long" %}
13657 ins_encode %{
13658 __ andq($dst$$Register, $src$$Register);
13659 %}
13660 ins_pipe(ialu_reg_reg);
13661 %}
13662
13663 // And Register with Register using New Data Destination (NDD)
13664 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13665 %{
13666 predicate(UseAPX);
13667 match(Set dst (AndL src1 src2));
13668 effect(KILL cr);
13669 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13670
13671 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13672 ins_encode %{
13673 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13674
13675 %}
13676 ins_pipe(ialu_reg_reg);
13677 %}
13678
13679 // And Register with Immediate 255
13680 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13681 %{
13682 match(Set dst (AndL src mask));
13683
13684 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13685 ins_encode %{
13686 // movzbl zeroes out the upper 32-bit and does not need REX.W
13687 __ movzbl($dst$$Register, $src$$Register);
13688 %}
13689 ins_pipe(ialu_reg);
13690 %}
13691
13692 // And Register with Immediate 65535
13693 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13694 %{
13695 match(Set dst (AndL src mask));
13696
13697 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13698 ins_encode %{
13699 // movzwl zeroes out the upper 32-bit and does not need REX.W
13700 __ movzwl($dst$$Register, $src$$Register);
13701 %}
13702 ins_pipe(ialu_reg);
13703 %}
13704
13705 // And Register with Immediate
13706 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13707 %{
13708 predicate(!UseAPX);
13709 match(Set dst (AndL dst src));
13710 effect(KILL cr);
13711 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13712
13713 format %{ "andq $dst, $src\t# long" %}
13714 ins_encode %{
13715 __ andq($dst$$Register, $src$$constant);
13716 %}
13717 ins_pipe(ialu_reg);
13718 %}
13719
13720 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13721 %{
13722 predicate(UseAPX);
13723 match(Set dst (AndL src1 src2));
13724 effect(KILL cr);
13725 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13726
13727 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13728 ins_encode %{
13729 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13730 %}
13731 ins_pipe(ialu_reg);
13732 %}
13733
13734 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13735 %{
13736 predicate(UseAPX);
13737 match(Set dst (AndL (LoadL src1) src2));
13738 effect(KILL cr);
13739 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13740
13741 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13742 ins_encode %{
13743 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13744 %}
13745 ins_pipe(ialu_reg);
13746 %}
13747
13748 // And Register with Memory
13749 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13750 %{
13751 predicate(!UseAPX);
13752 match(Set dst (AndL dst (LoadL src)));
13753 effect(KILL cr);
13754 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13755
13756 ins_cost(150);
13757 format %{ "andq $dst, $src\t# long" %}
13758 ins_encode %{
13759 __ andq($dst$$Register, $src$$Address);
13760 %}
13761 ins_pipe(ialu_reg_mem);
13762 %}
13763
13764 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13765 %{
13766 predicate(UseAPX);
13767 match(Set dst (AndL src1 (LoadL src2)));
13768 effect(KILL cr);
13769 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13770
13771 ins_cost(150);
13772 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13773 ins_encode %{
13774 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13775 %}
13776 ins_pipe(ialu_reg_mem);
13777 %}
13778
13779 // And Memory with Register
13780 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13781 %{
13782 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13783 effect(KILL cr);
13784 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13785
13786 ins_cost(150);
13787 format %{ "andq $dst, $src\t# long" %}
13788 ins_encode %{
13789 __ andq($dst$$Address, $src$$Register);
13790 %}
13791 ins_pipe(ialu_mem_reg);
13792 %}
13793
13794 // And Memory with Immediate
13795 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13796 %{
13797 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13798 effect(KILL cr);
13799 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13800
13801 ins_cost(125);
13802 format %{ "andq $dst, $src\t# long" %}
13803 ins_encode %{
13804 __ andq($dst$$Address, $src$$constant);
13805 %}
13806 ins_pipe(ialu_mem_imm);
13807 %}
13808
13809 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13810 %{
13811 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13812 // because AND/OR works well enough for 8/32-bit values.
13813 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13814
13815 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13816 effect(KILL cr);
13817
13818 ins_cost(125);
13819 format %{ "btrq $dst, log2(not($con))\t# long" %}
13820 ins_encode %{
13821 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13822 %}
13823 ins_pipe(ialu_mem_imm);
13824 %}
13825
13826 // BMI1 instructions
13827 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13828 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13829 predicate(UseBMI1Instructions);
13830 effect(KILL cr);
13831 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13832
13833 ins_cost(125);
13834 format %{ "andnq $dst, $src1, $src2" %}
13835
13836 ins_encode %{
13837 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13838 %}
13839 ins_pipe(ialu_reg_mem);
13840 %}
13841
13842 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13843 match(Set dst (AndL (XorL src1 minus_1) src2));
13844 predicate(UseBMI1Instructions);
13845 effect(KILL cr);
13846 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13847
13848 format %{ "andnq $dst, $src1, $src2" %}
13849
13850 ins_encode %{
13851 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13852 %}
13853 ins_pipe(ialu_reg_mem);
13854 %}
13855
13856 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13857 match(Set dst (AndL (SubL imm_zero src) src));
13858 predicate(UseBMI1Instructions);
13859 effect(KILL cr);
13860 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13861
13862 format %{ "blsiq $dst, $src" %}
13863
13864 ins_encode %{
13865 __ blsiq($dst$$Register, $src$$Register);
13866 %}
13867 ins_pipe(ialu_reg);
13868 %}
13869
13870 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13871 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13872 predicate(UseBMI1Instructions);
13873 effect(KILL cr);
13874 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13875
13876 ins_cost(125);
13877 format %{ "blsiq $dst, $src" %}
13878
13879 ins_encode %{
13880 __ blsiq($dst$$Register, $src$$Address);
13881 %}
13882 ins_pipe(ialu_reg_mem);
13883 %}
13884
13885 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13886 %{
13887 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13888 predicate(UseBMI1Instructions);
13889 effect(KILL cr);
13890 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13891
13892 ins_cost(125);
13893 format %{ "blsmskq $dst, $src" %}
13894
13895 ins_encode %{
13896 __ blsmskq($dst$$Register, $src$$Address);
13897 %}
13898 ins_pipe(ialu_reg_mem);
13899 %}
13900
13901 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13902 %{
13903 match(Set dst (XorL (AddL src minus_1) src));
13904 predicate(UseBMI1Instructions);
13905 effect(KILL cr);
13906 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13907
13908 format %{ "blsmskq $dst, $src" %}
13909
13910 ins_encode %{
13911 __ blsmskq($dst$$Register, $src$$Register);
13912 %}
13913
13914 ins_pipe(ialu_reg);
13915 %}
13916
13917 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13918 %{
13919 match(Set dst (AndL (AddL src minus_1) src) );
13920 predicate(UseBMI1Instructions);
13921 effect(KILL cr);
13922 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13923
13924 format %{ "blsrq $dst, $src" %}
13925
13926 ins_encode %{
13927 __ blsrq($dst$$Register, $src$$Register);
13928 %}
13929
13930 ins_pipe(ialu_reg);
13931 %}
13932
13933 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13934 %{
13935 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13936 predicate(UseBMI1Instructions);
13937 effect(KILL cr);
13938 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13939
13940 ins_cost(125);
13941 format %{ "blsrq $dst, $src" %}
13942
13943 ins_encode %{
13944 __ blsrq($dst$$Register, $src$$Address);
13945 %}
13946
13947 ins_pipe(ialu_reg);
13948 %}
13949
13950 // Or Instructions
13951 // Or Register with Register
13952 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13953 %{
13954 predicate(!UseAPX);
13955 match(Set dst (OrL dst src));
13956 effect(KILL cr);
13957 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13958
13959 format %{ "orq $dst, $src\t# long" %}
13960 ins_encode %{
13961 __ orq($dst$$Register, $src$$Register);
13962 %}
13963 ins_pipe(ialu_reg_reg);
13964 %}
13965
13966 // Or Register with Register using New Data Destination (NDD)
13967 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13968 %{
13969 predicate(UseAPX);
13970 match(Set dst (OrL src1 src2));
13971 effect(KILL cr);
13972 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13973
13974 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13975 ins_encode %{
13976 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13977
13978 %}
13979 ins_pipe(ialu_reg_reg);
13980 %}
13981
13982 // Use any_RegP to match R15 (TLS register) without spilling.
13983 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13984 match(Set dst (OrL dst (CastP2X src)));
13985 effect(KILL cr);
13986 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13987
13988 format %{ "orq $dst, $src\t# long" %}
13989 ins_encode %{
13990 __ orq($dst$$Register, $src$$Register);
13991 %}
13992 ins_pipe(ialu_reg_reg);
13993 %}
13994
13995 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13996 match(Set dst (OrL src1 (CastP2X src2)));
13997 effect(KILL cr);
13998 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13999
14000 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14001 ins_encode %{
14002 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14003 %}
14004 ins_pipe(ialu_reg_reg);
14005 %}
14006
14007 // Or Register with Immediate
14008 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14009 %{
14010 predicate(!UseAPX);
14011 match(Set dst (OrL dst src));
14012 effect(KILL cr);
14013 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14014
14015 format %{ "orq $dst, $src\t# long" %}
14016 ins_encode %{
14017 __ orq($dst$$Register, $src$$constant);
14018 %}
14019 ins_pipe(ialu_reg);
14020 %}
14021
14022 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14023 %{
14024 predicate(UseAPX);
14025 match(Set dst (OrL src1 src2));
14026 effect(KILL cr);
14027 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14028
14029 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14030 ins_encode %{
14031 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14032 %}
14033 ins_pipe(ialu_reg);
14034 %}
14035
14036 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14037 %{
14038 predicate(UseAPX);
14039 match(Set dst (OrL src1 src2));
14040 effect(KILL cr);
14041 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14042
14043 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14044 ins_encode %{
14045 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14046 %}
14047 ins_pipe(ialu_reg);
14048 %}
14049
14050 // Or Memory with Immediate
14051 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14052 %{
14053 predicate(UseAPX);
14054 match(Set dst (OrL (LoadL src1) src2));
14055 effect(KILL cr);
14056 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14057
14058 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14059 ins_encode %{
14060 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14061 %}
14062 ins_pipe(ialu_reg);
14063 %}
14064
14065 // Or Register with Memory
14066 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14067 %{
14068 predicate(!UseAPX);
14069 match(Set dst (OrL dst (LoadL src)));
14070 effect(KILL cr);
14071 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14072
14073 ins_cost(150);
14074 format %{ "orq $dst, $src\t# long" %}
14075 ins_encode %{
14076 __ orq($dst$$Register, $src$$Address);
14077 %}
14078 ins_pipe(ialu_reg_mem);
14079 %}
14080
14081 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14082 %{
14083 predicate(UseAPX);
14084 match(Set dst (OrL src1 (LoadL src2)));
14085 effect(KILL cr);
14086 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14087
14088 ins_cost(150);
14089 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14090 ins_encode %{
14091 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14092 %}
14093 ins_pipe(ialu_reg_mem);
14094 %}
14095
14096 // Or Memory with Register
14097 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14098 %{
14099 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14100 effect(KILL cr);
14101 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14102
14103 ins_cost(150);
14104 format %{ "orq $dst, $src\t# long" %}
14105 ins_encode %{
14106 __ orq($dst$$Address, $src$$Register);
14107 %}
14108 ins_pipe(ialu_mem_reg);
14109 %}
14110
14111 // Or Memory with Immediate
14112 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14113 %{
14114 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14115 effect(KILL cr);
14116 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14117
14118 ins_cost(125);
14119 format %{ "orq $dst, $src\t# long" %}
14120 ins_encode %{
14121 __ orq($dst$$Address, $src$$constant);
14122 %}
14123 ins_pipe(ialu_mem_imm);
14124 %}
14125
14126 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14127 %{
14128 // con should be a pure 64-bit power of 2 immediate
14129 // because AND/OR works well enough for 8/32-bit values.
14130 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14131
14132 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14133 effect(KILL cr);
14134
14135 ins_cost(125);
14136 format %{ "btsq $dst, log2($con)\t# long" %}
14137 ins_encode %{
14138 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14139 %}
14140 ins_pipe(ialu_mem_imm);
14141 %}
14142
14143 // Xor Instructions
14144 // Xor Register with Register
14145 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14146 %{
14147 predicate(!UseAPX);
14148 match(Set dst (XorL dst src));
14149 effect(KILL cr);
14150 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14151
14152 format %{ "xorq $dst, $src\t# long" %}
14153 ins_encode %{
14154 __ xorq($dst$$Register, $src$$Register);
14155 %}
14156 ins_pipe(ialu_reg_reg);
14157 %}
14158
14159 // Xor Register with Register using New Data Destination (NDD)
14160 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14161 %{
14162 predicate(UseAPX);
14163 match(Set dst (XorL src1 src2));
14164 effect(KILL cr);
14165 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14166
14167 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14168 ins_encode %{
14169 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14170 %}
14171 ins_pipe(ialu_reg_reg);
14172 %}
14173
14174 // Xor Register with Immediate -1
14175 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14176 %{
14177 predicate(!UseAPX);
14178 match(Set dst (XorL dst imm));
14179
14180 format %{ "notq $dst" %}
14181 ins_encode %{
14182 __ notq($dst$$Register);
14183 %}
14184 ins_pipe(ialu_reg);
14185 %}
14186
14187 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14188 %{
14189 predicate(UseAPX);
14190 match(Set dst (XorL src imm));
14191 flag(PD::Flag_ndd_demotable_opr1);
14192
14193 format %{ "enotq $dst, $src" %}
14194 ins_encode %{
14195 __ enotq($dst$$Register, $src$$Register);
14196 %}
14197 ins_pipe(ialu_reg);
14198 %}
14199
14200 // Xor Register with Immediate
14201 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14202 %{
14203 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14204 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14205 match(Set dst (XorL dst src));
14206 effect(KILL cr);
14207 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14208
14209 format %{ "xorq $dst, $src\t# long" %}
14210 ins_encode %{
14211 __ xorq($dst$$Register, $src$$constant);
14212 %}
14213 ins_pipe(ialu_reg);
14214 %}
14215
14216 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14217 %{
14218 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14219 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14220 match(Set dst (XorL src1 src2));
14221 effect(KILL cr);
14222 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14223
14224 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14225 ins_encode %{
14226 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14227 %}
14228 ins_pipe(ialu_reg);
14229 %}
14230
14231 // Xor Memory with Immediate
14232 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14233 %{
14234 predicate(UseAPX);
14235 match(Set dst (XorL (LoadL src1) src2));
14236 effect(KILL cr);
14237 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14238 ins_cost(150);
14239
14240 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14241 ins_encode %{
14242 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14243 %}
14244 ins_pipe(ialu_reg);
14245 %}
14246
14247 // Xor Register with Memory
14248 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14249 %{
14250 predicate(!UseAPX);
14251 match(Set dst (XorL dst (LoadL src)));
14252 effect(KILL cr);
14253 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14254
14255 ins_cost(150);
14256 format %{ "xorq $dst, $src\t# long" %}
14257 ins_encode %{
14258 __ xorq($dst$$Register, $src$$Address);
14259 %}
14260 ins_pipe(ialu_reg_mem);
14261 %}
14262
14263 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14264 %{
14265 predicate(UseAPX);
14266 match(Set dst (XorL src1 (LoadL src2)));
14267 effect(KILL cr);
14268 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14269
14270 ins_cost(150);
14271 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14272 ins_encode %{
14273 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14274 %}
14275 ins_pipe(ialu_reg_mem);
14276 %}
14277
14278 // Xor Memory with Register
14279 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14280 %{
14281 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14282 effect(KILL cr);
14283 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14284
14285 ins_cost(150);
14286 format %{ "xorq $dst, $src\t# long" %}
14287 ins_encode %{
14288 __ xorq($dst$$Address, $src$$Register);
14289 %}
14290 ins_pipe(ialu_mem_reg);
14291 %}
14292
14293 // Xor Memory with Immediate
14294 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14295 %{
14296 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14297 effect(KILL cr);
14298 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14299
14300 ins_cost(125);
14301 format %{ "xorq $dst, $src\t# long" %}
14302 ins_encode %{
14303 __ xorq($dst$$Address, $src$$constant);
14304 %}
14305 ins_pipe(ialu_mem_imm);
14306 %}
14307
14308 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14309 %{
14310 match(Set dst (CmpLTMask p q));
14311 effect(KILL cr);
14312
14313 ins_cost(400);
14314 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14315 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14316 "negl $dst" %}
14317 ins_encode %{
14318 __ cmpl($p$$Register, $q$$Register);
14319 __ setcc(Assembler::less, $dst$$Register);
14320 __ negl($dst$$Register);
14321 %}
14322 ins_pipe(pipe_slow);
14323 %}
14324
14325 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14326 %{
14327 match(Set dst (CmpLTMask dst zero));
14328 effect(KILL cr);
14329
14330 ins_cost(100);
14331 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14332 ins_encode %{
14333 __ sarl($dst$$Register, 31);
14334 %}
14335 ins_pipe(ialu_reg);
14336 %}
14337
14338 /* Better to save a register than avoid a branch */
14339 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14340 %{
14341 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14342 effect(KILL cr);
14343 ins_cost(300);
14344 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14345 "jge done\n\t"
14346 "addl $p,$y\n"
14347 "done: " %}
14348 ins_encode %{
14349 Register Rp = $p$$Register;
14350 Register Rq = $q$$Register;
14351 Register Ry = $y$$Register;
14352 Label done;
14353 __ subl(Rp, Rq);
14354 __ jccb(Assembler::greaterEqual, done);
14355 __ addl(Rp, Ry);
14356 __ bind(done);
14357 %}
14358 ins_pipe(pipe_cmplt);
14359 %}
14360
14361 /* Better to save a register than avoid a branch */
14362 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14363 %{
14364 match(Set y (AndI (CmpLTMask p q) y));
14365 effect(KILL cr);
14366
14367 ins_cost(300);
14368
14369 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14370 "jlt done\n\t"
14371 "xorl $y, $y\n"
14372 "done: " %}
14373 ins_encode %{
14374 Register Rp = $p$$Register;
14375 Register Rq = $q$$Register;
14376 Register Ry = $y$$Register;
14377 Label done;
14378 __ cmpl(Rp, Rq);
14379 __ jccb(Assembler::less, done);
14380 __ xorl(Ry, Ry);
14381 __ bind(done);
14382 %}
14383 ins_pipe(pipe_cmplt);
14384 %}
14385
14386
14387 //---------- FP Instructions------------------------------------------------
14388
14389 // Really expensive, avoid
14390 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14391 %{
14392 match(Set cr (CmpF src1 src2));
14393
14394 ins_cost(500);
14395 format %{ "ucomiss $src1, $src2\n\t"
14396 "jnp,s exit\n\t"
14397 "pushfq\t# saw NaN, set CF\n\t"
14398 "andq [rsp], #0xffffff2b\n\t"
14399 "popfq\n"
14400 "exit:" %}
14401 ins_encode %{
14402 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14403 emit_cmpfp_fixup(masm);
14404 %}
14405 ins_pipe(pipe_slow);
14406 %}
14407
14408 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14409 match(Set cr (CmpF src1 src2));
14410
14411 ins_cost(100);
14412 format %{ "ucomiss $src1, $src2" %}
14413 ins_encode %{
14414 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14415 %}
14416 ins_pipe(pipe_slow);
14417 %}
14418
14419 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14420 match(Set cr (CmpF src1 (LoadF src2)));
14421
14422 ins_cost(100);
14423 format %{ "ucomiss $src1, $src2" %}
14424 ins_encode %{
14425 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14426 %}
14427 ins_pipe(pipe_slow);
14428 %}
14429
14430 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14431 match(Set cr (CmpF src con));
14432 ins_cost(100);
14433 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14434 ins_encode %{
14435 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14436 %}
14437 ins_pipe(pipe_slow);
14438 %}
14439
14440 // Really expensive, avoid
14441 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14442 %{
14443 match(Set cr (CmpD src1 src2));
14444
14445 ins_cost(500);
14446 format %{ "ucomisd $src1, $src2\n\t"
14447 "jnp,s exit\n\t"
14448 "pushfq\t# saw NaN, set CF\n\t"
14449 "andq [rsp], #0xffffff2b\n\t"
14450 "popfq\n"
14451 "exit:" %}
14452 ins_encode %{
14453 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14454 emit_cmpfp_fixup(masm);
14455 %}
14456 ins_pipe(pipe_slow);
14457 %}
14458
14459 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14460 match(Set cr (CmpD src1 src2));
14461
14462 ins_cost(100);
14463 format %{ "ucomisd $src1, $src2 test" %}
14464 ins_encode %{
14465 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14466 %}
14467 ins_pipe(pipe_slow);
14468 %}
14469
14470 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14471 match(Set cr (CmpD src1 (LoadD src2)));
14472
14473 ins_cost(100);
14474 format %{ "ucomisd $src1, $src2" %}
14475 ins_encode %{
14476 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14477 %}
14478 ins_pipe(pipe_slow);
14479 %}
14480
14481 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14482 match(Set cr (CmpD src con));
14483 ins_cost(100);
14484 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14485 ins_encode %{
14486 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14487 %}
14488 ins_pipe(pipe_slow);
14489 %}
14490
14491 // Compare into -1,0,1
14492 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14493 %{
14494 match(Set dst (CmpF3 src1 src2));
14495 effect(KILL cr);
14496
14497 ins_cost(275);
14498 format %{ "ucomiss $src1, $src2\n\t"
14499 "movl $dst, #-1\n\t"
14500 "jp,s done\n\t"
14501 "jb,s done\n\t"
14502 "setne $dst\n\t"
14503 "movzbl $dst, $dst\n"
14504 "done:" %}
14505 ins_encode %{
14506 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14507 emit_cmpfp3(masm, $dst$$Register);
14508 %}
14509 ins_pipe(pipe_slow);
14510 %}
14511
14512 // Compare into -1,0,1
14513 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14514 %{
14515 match(Set dst (CmpF3 src1 (LoadF src2)));
14516 effect(KILL cr);
14517
14518 ins_cost(275);
14519 format %{ "ucomiss $src1, $src2\n\t"
14520 "movl $dst, #-1\n\t"
14521 "jp,s done\n\t"
14522 "jb,s done\n\t"
14523 "setne $dst\n\t"
14524 "movzbl $dst, $dst\n"
14525 "done:" %}
14526 ins_encode %{
14527 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14528 emit_cmpfp3(masm, $dst$$Register);
14529 %}
14530 ins_pipe(pipe_slow);
14531 %}
14532
14533 // Compare into -1,0,1
14534 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14535 match(Set dst (CmpF3 src con));
14536 effect(KILL cr);
14537
14538 ins_cost(275);
14539 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14540 "movl $dst, #-1\n\t"
14541 "jp,s done\n\t"
14542 "jb,s done\n\t"
14543 "setne $dst\n\t"
14544 "movzbl $dst, $dst\n"
14545 "done:" %}
14546 ins_encode %{
14547 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14548 emit_cmpfp3(masm, $dst$$Register);
14549 %}
14550 ins_pipe(pipe_slow);
14551 %}
14552
14553 // Compare into -1,0,1
14554 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14555 %{
14556 match(Set dst (CmpD3 src1 src2));
14557 effect(KILL cr);
14558
14559 ins_cost(275);
14560 format %{ "ucomisd $src1, $src2\n\t"
14561 "movl $dst, #-1\n\t"
14562 "jp,s done\n\t"
14563 "jb,s done\n\t"
14564 "setne $dst\n\t"
14565 "movzbl $dst, $dst\n"
14566 "done:" %}
14567 ins_encode %{
14568 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14569 emit_cmpfp3(masm, $dst$$Register);
14570 %}
14571 ins_pipe(pipe_slow);
14572 %}
14573
14574 // Compare into -1,0,1
14575 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14576 %{
14577 match(Set dst (CmpD3 src1 (LoadD src2)));
14578 effect(KILL cr);
14579
14580 ins_cost(275);
14581 format %{ "ucomisd $src1, $src2\n\t"
14582 "movl $dst, #-1\n\t"
14583 "jp,s done\n\t"
14584 "jb,s done\n\t"
14585 "setne $dst\n\t"
14586 "movzbl $dst, $dst\n"
14587 "done:" %}
14588 ins_encode %{
14589 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14590 emit_cmpfp3(masm, $dst$$Register);
14591 %}
14592 ins_pipe(pipe_slow);
14593 %}
14594
14595 // Compare into -1,0,1
14596 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14597 match(Set dst (CmpD3 src con));
14598 effect(KILL cr);
14599
14600 ins_cost(275);
14601 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14602 "movl $dst, #-1\n\t"
14603 "jp,s done\n\t"
14604 "jb,s done\n\t"
14605 "setne $dst\n\t"
14606 "movzbl $dst, $dst\n"
14607 "done:" %}
14608 ins_encode %{
14609 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14610 emit_cmpfp3(masm, $dst$$Register);
14611 %}
14612 ins_pipe(pipe_slow);
14613 %}
14614
14615 //----------Arithmetic Conversion Instructions---------------------------------
14616
14617 instruct convF2D_reg_reg(regD dst, regF src)
14618 %{
14619 match(Set dst (ConvF2D src));
14620
14621 format %{ "cvtss2sd $dst, $src" %}
14622 ins_encode %{
14623 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14624 %}
14625 ins_pipe(pipe_slow); // XXX
14626 %}
14627
14628 instruct convF2D_reg_mem(regD dst, memory src)
14629 %{
14630 predicate(UseAVX == 0);
14631 match(Set dst (ConvF2D (LoadF src)));
14632
14633 format %{ "cvtss2sd $dst, $src" %}
14634 ins_encode %{
14635 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14636 %}
14637 ins_pipe(pipe_slow); // XXX
14638 %}
14639
14640 instruct convD2F_reg_reg(regF dst, regD src)
14641 %{
14642 match(Set dst (ConvD2F src));
14643
14644 format %{ "cvtsd2ss $dst, $src" %}
14645 ins_encode %{
14646 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14647 %}
14648 ins_pipe(pipe_slow); // XXX
14649 %}
14650
14651 instruct convD2F_reg_mem(regF dst, memory src)
14652 %{
14653 predicate(UseAVX == 0);
14654 match(Set dst (ConvD2F (LoadD src)));
14655
14656 format %{ "cvtsd2ss $dst, $src" %}
14657 ins_encode %{
14658 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14659 %}
14660 ins_pipe(pipe_slow); // XXX
14661 %}
14662
14663 // XXX do mem variants
14664 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14665 %{
14666 predicate(!VM_Version::supports_avx10_2());
14667 match(Set dst (ConvF2I src));
14668 effect(KILL cr);
14669 format %{ "convert_f2i $dst, $src" %}
14670 ins_encode %{
14671 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14672 %}
14673 ins_pipe(pipe_slow);
14674 %}
14675
14676 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14677 %{
14678 predicate(VM_Version::supports_avx10_2());
14679 match(Set dst (ConvF2I src));
14680 format %{ "evcvttss2sisl $dst, $src" %}
14681 ins_encode %{
14682 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14683 %}
14684 ins_pipe(pipe_slow);
14685 %}
14686
14687 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14688 %{
14689 predicate(VM_Version::supports_avx10_2());
14690 match(Set dst (ConvF2I (LoadF src)));
14691 format %{ "evcvttss2sisl $dst, $src" %}
14692 ins_encode %{
14693 __ evcvttss2sisl($dst$$Register, $src$$Address);
14694 %}
14695 ins_pipe(pipe_slow);
14696 %}
14697
14698 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14699 %{
14700 predicate(!VM_Version::supports_avx10_2());
14701 match(Set dst (ConvF2L src));
14702 effect(KILL cr);
14703 format %{ "convert_f2l $dst, $src"%}
14704 ins_encode %{
14705 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14706 %}
14707 ins_pipe(pipe_slow);
14708 %}
14709
14710 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14711 %{
14712 predicate(VM_Version::supports_avx10_2());
14713 match(Set dst (ConvF2L src));
14714 format %{ "evcvttss2sisq $dst, $src" %}
14715 ins_encode %{
14716 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14717 %}
14718 ins_pipe(pipe_slow);
14719 %}
14720
14721 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14722 %{
14723 predicate(VM_Version::supports_avx10_2());
14724 match(Set dst (ConvF2L (LoadF src)));
14725 format %{ "evcvttss2sisq $dst, $src" %}
14726 ins_encode %{
14727 __ evcvttss2sisq($dst$$Register, $src$$Address);
14728 %}
14729 ins_pipe(pipe_slow);
14730 %}
14731
14732 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14733 %{
14734 predicate(!VM_Version::supports_avx10_2());
14735 match(Set dst (ConvD2I src));
14736 effect(KILL cr);
14737 format %{ "convert_d2i $dst, $src"%}
14738 ins_encode %{
14739 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14740 %}
14741 ins_pipe(pipe_slow);
14742 %}
14743
14744 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14745 %{
14746 predicate(VM_Version::supports_avx10_2());
14747 match(Set dst (ConvD2I src));
14748 format %{ "evcvttsd2sisl $dst, $src" %}
14749 ins_encode %{
14750 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14751 %}
14752 ins_pipe(pipe_slow);
14753 %}
14754
14755 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14756 %{
14757 predicate(VM_Version::supports_avx10_2());
14758 match(Set dst (ConvD2I (LoadD src)));
14759 format %{ "evcvttsd2sisl $dst, $src" %}
14760 ins_encode %{
14761 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14762 %}
14763 ins_pipe(pipe_slow);
14764 %}
14765
14766 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14767 %{
14768 predicate(!VM_Version::supports_avx10_2());
14769 match(Set dst (ConvD2L src));
14770 effect(KILL cr);
14771 format %{ "convert_d2l $dst, $src"%}
14772 ins_encode %{
14773 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14774 %}
14775 ins_pipe(pipe_slow);
14776 %}
14777
14778 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14779 %{
14780 predicate(VM_Version::supports_avx10_2());
14781 match(Set dst (ConvD2L src));
14782 format %{ "evcvttsd2sisq $dst, $src" %}
14783 ins_encode %{
14784 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14785 %}
14786 ins_pipe(pipe_slow);
14787 %}
14788
14789 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14790 %{
14791 predicate(VM_Version::supports_avx10_2());
14792 match(Set dst (ConvD2L (LoadD src)));
14793 format %{ "evcvttsd2sisq $dst, $src" %}
14794 ins_encode %{
14795 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14796 %}
14797 ins_pipe(pipe_slow);
14798 %}
14799
14800 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14801 %{
14802 match(Set dst (RoundD src));
14803 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14804 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14805 ins_encode %{
14806 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14807 %}
14808 ins_pipe(pipe_slow);
14809 %}
14810
14811 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14812 %{
14813 match(Set dst (RoundF src));
14814 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14815 format %{ "round_float $dst,$src" %}
14816 ins_encode %{
14817 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14818 %}
14819 ins_pipe(pipe_slow);
14820 %}
14821
14822 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14823 %{
14824 predicate(!UseXmmI2F);
14825 match(Set dst (ConvI2F src));
14826
14827 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14828 ins_encode %{
14829 if (UseAVX > 0) {
14830 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14831 }
14832 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14833 %}
14834 ins_pipe(pipe_slow); // XXX
14835 %}
14836
14837 instruct convI2F_reg_mem(regF dst, memory src)
14838 %{
14839 predicate(UseAVX == 0);
14840 match(Set dst (ConvI2F (LoadI src)));
14841
14842 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14843 ins_encode %{
14844 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14845 %}
14846 ins_pipe(pipe_slow); // XXX
14847 %}
14848
14849 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14850 %{
14851 predicate(!UseXmmI2D);
14852 match(Set dst (ConvI2D src));
14853
14854 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14855 ins_encode %{
14856 if (UseAVX > 0) {
14857 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14858 }
14859 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14860 %}
14861 ins_pipe(pipe_slow); // XXX
14862 %}
14863
14864 instruct convI2D_reg_mem(regD dst, memory src)
14865 %{
14866 predicate(UseAVX == 0);
14867 match(Set dst (ConvI2D (LoadI src)));
14868
14869 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14870 ins_encode %{
14871 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14872 %}
14873 ins_pipe(pipe_slow); // XXX
14874 %}
14875
14876 instruct convXI2F_reg(regF dst, rRegI src)
14877 %{
14878 predicate(UseXmmI2F);
14879 match(Set dst (ConvI2F src));
14880
14881 format %{ "movdl $dst, $src\n\t"
14882 "cvtdq2psl $dst, $dst\t# i2f" %}
14883 ins_encode %{
14884 __ movdl($dst$$XMMRegister, $src$$Register);
14885 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14886 %}
14887 ins_pipe(pipe_slow); // XXX
14888 %}
14889
14890 instruct convXI2D_reg(regD dst, rRegI src)
14891 %{
14892 predicate(UseXmmI2D);
14893 match(Set dst (ConvI2D src));
14894
14895 format %{ "movdl $dst, $src\n\t"
14896 "cvtdq2pdl $dst, $dst\t# i2d" %}
14897 ins_encode %{
14898 __ movdl($dst$$XMMRegister, $src$$Register);
14899 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14900 %}
14901 ins_pipe(pipe_slow); // XXX
14902 %}
14903
14904 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14905 %{
14906 match(Set dst (ConvL2F src));
14907
14908 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14909 ins_encode %{
14910 if (UseAVX > 0) {
14911 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14912 }
14913 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14914 %}
14915 ins_pipe(pipe_slow); // XXX
14916 %}
14917
14918 instruct convL2F_reg_mem(regF dst, memory src)
14919 %{
14920 predicate(UseAVX == 0);
14921 match(Set dst (ConvL2F (LoadL src)));
14922
14923 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14924 ins_encode %{
14925 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14926 %}
14927 ins_pipe(pipe_slow); // XXX
14928 %}
14929
14930 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14931 %{
14932 match(Set dst (ConvL2D src));
14933
14934 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14935 ins_encode %{
14936 if (UseAVX > 0) {
14937 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14938 }
14939 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14940 %}
14941 ins_pipe(pipe_slow); // XXX
14942 %}
14943
14944 instruct convL2D_reg_mem(regD dst, memory src)
14945 %{
14946 predicate(UseAVX == 0);
14947 match(Set dst (ConvL2D (LoadL src)));
14948
14949 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14950 ins_encode %{
14951 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14952 %}
14953 ins_pipe(pipe_slow); // XXX
14954 %}
14955
14956 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14957 %{
14958 match(Set dst (ConvI2L src));
14959
14960 ins_cost(125);
14961 format %{ "movslq $dst, $src\t# i2l" %}
14962 ins_encode %{
14963 __ movslq($dst$$Register, $src$$Register);
14964 %}
14965 ins_pipe(ialu_reg_reg);
14966 %}
14967
14968 // Zero-extend convert int to long
14969 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14970 %{
14971 match(Set dst (AndL (ConvI2L src) mask));
14972
14973 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14974 ins_encode %{
14975 if ($dst$$reg != $src$$reg) {
14976 __ movl($dst$$Register, $src$$Register);
14977 }
14978 %}
14979 ins_pipe(ialu_reg_reg);
14980 %}
14981
14982 // Zero-extend convert int to long
14983 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14984 %{
14985 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14986
14987 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14988 ins_encode %{
14989 __ movl($dst$$Register, $src$$Address);
14990 %}
14991 ins_pipe(ialu_reg_mem);
14992 %}
14993
14994 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14995 %{
14996 match(Set dst (AndL src mask));
14997
14998 format %{ "movl $dst, $src\t# zero-extend long" %}
14999 ins_encode %{
15000 __ movl($dst$$Register, $src$$Register);
15001 %}
15002 ins_pipe(ialu_reg_reg);
15003 %}
15004
15005 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15006 %{
15007 match(Set dst (ConvL2I src));
15008
15009 format %{ "movl $dst, $src\t# l2i" %}
15010 ins_encode %{
15011 __ movl($dst$$Register, $src$$Register);
15012 %}
15013 ins_pipe(ialu_reg_reg);
15014 %}
15015
15016
15017 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15018 match(Set dst (MoveF2I src));
15019 effect(DEF dst, USE src);
15020
15021 ins_cost(125);
15022 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15023 ins_encode %{
15024 __ movl($dst$$Register, Address(rsp, $src$$disp));
15025 %}
15026 ins_pipe(ialu_reg_mem);
15027 %}
15028
15029 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15030 match(Set dst (MoveI2F src));
15031 effect(DEF dst, USE src);
15032
15033 ins_cost(125);
15034 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15035 ins_encode %{
15036 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15037 %}
15038 ins_pipe(pipe_slow);
15039 %}
15040
15041 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15042 match(Set dst (MoveD2L src));
15043 effect(DEF dst, USE src);
15044
15045 ins_cost(125);
15046 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15047 ins_encode %{
15048 __ movq($dst$$Register, Address(rsp, $src$$disp));
15049 %}
15050 ins_pipe(ialu_reg_mem);
15051 %}
15052
15053 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15054 predicate(!UseXmmLoadAndClearUpper);
15055 match(Set dst (MoveL2D src));
15056 effect(DEF dst, USE src);
15057
15058 ins_cost(125);
15059 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15060 ins_encode %{
15061 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15062 %}
15063 ins_pipe(pipe_slow);
15064 %}
15065
15066 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15067 predicate(UseXmmLoadAndClearUpper);
15068 match(Set dst (MoveL2D src));
15069 effect(DEF dst, USE src);
15070
15071 ins_cost(125);
15072 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15073 ins_encode %{
15074 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15075 %}
15076 ins_pipe(pipe_slow);
15077 %}
15078
15079
15080 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15081 match(Set dst (MoveF2I src));
15082 effect(DEF dst, USE src);
15083
15084 ins_cost(95); // XXX
15085 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15086 ins_encode %{
15087 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15088 %}
15089 ins_pipe(pipe_slow);
15090 %}
15091
15092 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15093 match(Set dst (MoveI2F src));
15094 effect(DEF dst, USE src);
15095
15096 ins_cost(100);
15097 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15098 ins_encode %{
15099 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15100 %}
15101 ins_pipe( ialu_mem_reg );
15102 %}
15103
15104 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15105 match(Set dst (MoveD2L src));
15106 effect(DEF dst, USE src);
15107
15108 ins_cost(95); // XXX
15109 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15110 ins_encode %{
15111 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15112 %}
15113 ins_pipe(pipe_slow);
15114 %}
15115
15116 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15117 match(Set dst (MoveL2D src));
15118 effect(DEF dst, USE src);
15119
15120 ins_cost(100);
15121 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15122 ins_encode %{
15123 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15124 %}
15125 ins_pipe(ialu_mem_reg);
15126 %}
15127
15128 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15129 match(Set dst (MoveF2I src));
15130 effect(DEF dst, USE src);
15131 ins_cost(85);
15132 format %{ "movd $dst,$src\t# MoveF2I" %}
15133 ins_encode %{
15134 __ movdl($dst$$Register, $src$$XMMRegister);
15135 %}
15136 ins_pipe( pipe_slow );
15137 %}
15138
15139 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15140 match(Set dst (MoveD2L src));
15141 effect(DEF dst, USE src);
15142 ins_cost(85);
15143 format %{ "movd $dst,$src\t# MoveD2L" %}
15144 ins_encode %{
15145 __ movdq($dst$$Register, $src$$XMMRegister);
15146 %}
15147 ins_pipe( pipe_slow );
15148 %}
15149
15150 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15151 match(Set dst (MoveI2F src));
15152 effect(DEF dst, USE src);
15153 ins_cost(100);
15154 format %{ "movd $dst,$src\t# MoveI2F" %}
15155 ins_encode %{
15156 __ movdl($dst$$XMMRegister, $src$$Register);
15157 %}
15158 ins_pipe( pipe_slow );
15159 %}
15160
15161 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15162 match(Set dst (MoveL2D src));
15163 effect(DEF dst, USE src);
15164 ins_cost(100);
15165 format %{ "movd $dst,$src\t# MoveL2D" %}
15166 ins_encode %{
15167 __ movdq($dst$$XMMRegister, $src$$Register);
15168 %}
15169 ins_pipe( pipe_slow );
15170 %}
15171
15172
15173 // Fast clearing of an array
15174 // Small non-constant lenght ClearArray for non-AVX512 targets.
15175 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15176 Universe dummy, rFlagsReg cr)
15177 %{
15178 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15179 match(Set dummy (ClearArray (Binary cnt base) val));
15180 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15181
15182 format %{ $$template
15183 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15184 $$emit$$"jg LARGE\n\t"
15185 $$emit$$"dec rcx\n\t"
15186 $$emit$$"js DONE\t# Zero length\n\t"
15187 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15188 $$emit$$"dec rcx\n\t"
15189 $$emit$$"jge LOOP\n\t"
15190 $$emit$$"jmp DONE\n\t"
15191 $$emit$$"# LARGE:\n\t"
15192 if (UseFastStosb) {
15193 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15194 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15195 } else if (UseXMMForObjInit) {
15196 $$emit$$"movdq $tmp, $val\n\t"
15197 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15198 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15199 $$emit$$"jmpq L_zero_64_bytes\n\t"
15200 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15201 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15202 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15203 $$emit$$"add 0x40,rax\n\t"
15204 $$emit$$"# L_zero_64_bytes:\n\t"
15205 $$emit$$"sub 0x8,rcx\n\t"
15206 $$emit$$"jge L_loop\n\t"
15207 $$emit$$"add 0x4,rcx\n\t"
15208 $$emit$$"jl L_tail\n\t"
15209 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15210 $$emit$$"add 0x20,rax\n\t"
15211 $$emit$$"sub 0x4,rcx\n\t"
15212 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15213 $$emit$$"add 0x4,rcx\n\t"
15214 $$emit$$"jle L_end\n\t"
15215 $$emit$$"dec rcx\n\t"
15216 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15217 $$emit$$"vmovq xmm0,(rax)\n\t"
15218 $$emit$$"add 0x8,rax\n\t"
15219 $$emit$$"dec rcx\n\t"
15220 $$emit$$"jge L_sloop\n\t"
15221 $$emit$$"# L_end:\n\t"
15222 } else {
15223 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15224 }
15225 $$emit$$"# DONE"
15226 %}
15227 ins_encode %{
15228 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15229 $tmp$$XMMRegister, false, false);
15230 %}
15231 ins_pipe(pipe_slow);
15232 %}
15233
15234 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15235 Universe dummy, rFlagsReg cr)
15236 %{
15237 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15238 match(Set dummy (ClearArray (Binary cnt base) val));
15239 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15240
15241 format %{ $$template
15242 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15243 $$emit$$"jg LARGE\n\t"
15244 $$emit$$"dec rcx\n\t"
15245 $$emit$$"js DONE\t# Zero length\n\t"
15246 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15247 $$emit$$"dec rcx\n\t"
15248 $$emit$$"jge LOOP\n\t"
15249 $$emit$$"jmp DONE\n\t"
15250 $$emit$$"# LARGE:\n\t"
15251 if (UseXMMForObjInit) {
15252 $$emit$$"movdq $tmp, $val\n\t"
15253 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15254 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15255 $$emit$$"jmpq L_zero_64_bytes\n\t"
15256 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15258 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15259 $$emit$$"add 0x40,rax\n\t"
15260 $$emit$$"# L_zero_64_bytes:\n\t"
15261 $$emit$$"sub 0x8,rcx\n\t"
15262 $$emit$$"jge L_loop\n\t"
15263 $$emit$$"add 0x4,rcx\n\t"
15264 $$emit$$"jl L_tail\n\t"
15265 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15266 $$emit$$"add 0x20,rax\n\t"
15267 $$emit$$"sub 0x4,rcx\n\t"
15268 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15269 $$emit$$"add 0x4,rcx\n\t"
15270 $$emit$$"jle L_end\n\t"
15271 $$emit$$"dec rcx\n\t"
15272 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15273 $$emit$$"vmovq xmm0,(rax)\n\t"
15274 $$emit$$"add 0x8,rax\n\t"
15275 $$emit$$"dec rcx\n\t"
15276 $$emit$$"jge L_sloop\n\t"
15277 $$emit$$"# L_end:\n\t"
15278 } else {
15279 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15280 }
15281 $$emit$$"# DONE"
15282 %}
15283 ins_encode %{
15284 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15285 $tmp$$XMMRegister, false, true);
15286 %}
15287 ins_pipe(pipe_slow);
15288 %}
15289
15290 // Small non-constant length ClearArray for AVX512 targets.
15291 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15292 Universe dummy, rFlagsReg cr)
15293 %{
15294 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15295 match(Set dummy (ClearArray (Binary cnt base) val));
15296 ins_cost(125);
15297 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15298
15299 format %{ $$template
15300 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15301 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15302 $$emit$$"jg LARGE\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"js DONE\t# Zero length\n\t"
15305 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15306 $$emit$$"dec rcx\n\t"
15307 $$emit$$"jge LOOP\n\t"
15308 $$emit$$"jmp DONE\n\t"
15309 $$emit$$"# LARGE:\n\t"
15310 if (UseFastStosb) {
15311 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15312 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15313 } else if (UseXMMForObjInit) {
15314 $$emit$$"mov rdi,rax\n\t"
15315 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15316 $$emit$$"jmpq L_zero_64_bytes\n\t"
15317 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15318 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15319 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15320 $$emit$$"add 0x40,rax\n\t"
15321 $$emit$$"# L_zero_64_bytes:\n\t"
15322 $$emit$$"sub 0x8,rcx\n\t"
15323 $$emit$$"jge L_loop\n\t"
15324 $$emit$$"add 0x4,rcx\n\t"
15325 $$emit$$"jl L_tail\n\t"
15326 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15327 $$emit$$"add 0x20,rax\n\t"
15328 $$emit$$"sub 0x4,rcx\n\t"
15329 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15330 $$emit$$"add 0x4,rcx\n\t"
15331 $$emit$$"jle L_end\n\t"
15332 $$emit$$"dec rcx\n\t"
15333 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15334 $$emit$$"vmovq xmm0,(rax)\n\t"
15335 $$emit$$"add 0x8,rax\n\t"
15336 $$emit$$"dec rcx\n\t"
15337 $$emit$$"jge L_sloop\n\t"
15338 $$emit$$"# L_end:\n\t"
15339 } else {
15340 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15341 }
15342 $$emit$$"# DONE"
15343 %}
15344 ins_encode %{
15345 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15346 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15347 %}
15348 ins_pipe(pipe_slow);
15349 %}
15350
15351 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15352 Universe dummy, rFlagsReg cr)
15353 %{
15354 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15355 match(Set dummy (ClearArray (Binary cnt base) val));
15356 ins_cost(125);
15357 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15358
15359 format %{ $$template
15360 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15361 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15362 $$emit$$"jg LARGE\n\t"
15363 $$emit$$"dec rcx\n\t"
15364 $$emit$$"js DONE\t# Zero length\n\t"
15365 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15366 $$emit$$"dec rcx\n\t"
15367 $$emit$$"jge LOOP\n\t"
15368 $$emit$$"jmp DONE\n\t"
15369 $$emit$$"# LARGE:\n\t"
15370 if (UseFastStosb) {
15371 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15372 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15373 } else if (UseXMMForObjInit) {
15374 $$emit$$"mov rdi,rax\n\t"
15375 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15376 $$emit$$"jmpq L_zero_64_bytes\n\t"
15377 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15378 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15379 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15380 $$emit$$"add 0x40,rax\n\t"
15381 $$emit$$"# L_zero_64_bytes:\n\t"
15382 $$emit$$"sub 0x8,rcx\n\t"
15383 $$emit$$"jge L_loop\n\t"
15384 $$emit$$"add 0x4,rcx\n\t"
15385 $$emit$$"jl L_tail\n\t"
15386 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387 $$emit$$"add 0x20,rax\n\t"
15388 $$emit$$"sub 0x4,rcx\n\t"
15389 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15390 $$emit$$"add 0x4,rcx\n\t"
15391 $$emit$$"jle L_end\n\t"
15392 $$emit$$"dec rcx\n\t"
15393 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15394 $$emit$$"vmovq xmm0,(rax)\n\t"
15395 $$emit$$"add 0x8,rax\n\t"
15396 $$emit$$"dec rcx\n\t"
15397 $$emit$$"jge L_sloop\n\t"
15398 $$emit$$"# L_end:\n\t"
15399 } else {
15400 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15401 }
15402 $$emit$$"# DONE"
15403 %}
15404 ins_encode %{
15405 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15406 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15407 %}
15408 ins_pipe(pipe_slow);
15409 %}
15410
15411 // Large non-constant length ClearArray for non-AVX512 targets.
15412 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15413 Universe dummy, rFlagsReg cr)
15414 %{
15415 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15416 match(Set dummy (ClearArray (Binary cnt base) val));
15417 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15418
15419 format %{ $$template
15420 if (UseFastStosb) {
15421 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15422 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15423 } else if (UseXMMForObjInit) {
15424 $$emit$$"movdq $tmp, $val\n\t"
15425 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15426 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15427 $$emit$$"jmpq L_zero_64_bytes\n\t"
15428 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15429 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15430 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15431 $$emit$$"add 0x40,rax\n\t"
15432 $$emit$$"# L_zero_64_bytes:\n\t"
15433 $$emit$$"sub 0x8,rcx\n\t"
15434 $$emit$$"jge L_loop\n\t"
15435 $$emit$$"add 0x4,rcx\n\t"
15436 $$emit$$"jl L_tail\n\t"
15437 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15438 $$emit$$"add 0x20,rax\n\t"
15439 $$emit$$"sub 0x4,rcx\n\t"
15440 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15441 $$emit$$"add 0x4,rcx\n\t"
15442 $$emit$$"jle L_end\n\t"
15443 $$emit$$"dec rcx\n\t"
15444 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15445 $$emit$$"vmovq xmm0,(rax)\n\t"
15446 $$emit$$"add 0x8,rax\n\t"
15447 $$emit$$"dec rcx\n\t"
15448 $$emit$$"jge L_sloop\n\t"
15449 $$emit$$"# L_end:\n\t"
15450 } else {
15451 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15452 }
15453 %}
15454 ins_encode %{
15455 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15456 $tmp$$XMMRegister, true, false);
15457 %}
15458 ins_pipe(pipe_slow);
15459 %}
15460
15461 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15462 Universe dummy, rFlagsReg cr)
15463 %{
15464 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15465 match(Set dummy (ClearArray (Binary cnt base) val));
15466 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15467
15468 format %{ $$template
15469 if (UseXMMForObjInit) {
15470 $$emit$$"movdq $tmp, $val\n\t"
15471 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15472 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15473 $$emit$$"jmpq L_zero_64_bytes\n\t"
15474 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15475 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15476 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15477 $$emit$$"add 0x40,rax\n\t"
15478 $$emit$$"# L_zero_64_bytes:\n\t"
15479 $$emit$$"sub 0x8,rcx\n\t"
15480 $$emit$$"jge L_loop\n\t"
15481 $$emit$$"add 0x4,rcx\n\t"
15482 $$emit$$"jl L_tail\n\t"
15483 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15484 $$emit$$"add 0x20,rax\n\t"
15485 $$emit$$"sub 0x4,rcx\n\t"
15486 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15487 $$emit$$"add 0x4,rcx\n\t"
15488 $$emit$$"jle L_end\n\t"
15489 $$emit$$"dec rcx\n\t"
15490 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15491 $$emit$$"vmovq xmm0,(rax)\n\t"
15492 $$emit$$"add 0x8,rax\n\t"
15493 $$emit$$"dec rcx\n\t"
15494 $$emit$$"jge L_sloop\n\t"
15495 $$emit$$"# L_end:\n\t"
15496 } else {
15497 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15498 }
15499 %}
15500 ins_encode %{
15501 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15502 $tmp$$XMMRegister, true, true);
15503 %}
15504 ins_pipe(pipe_slow);
15505 %}
15506
15507 // Large non-constant length ClearArray for AVX512 targets.
15508 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15509 Universe dummy, rFlagsReg cr)
15510 %{
15511 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15512 match(Set dummy (ClearArray (Binary cnt base) val));
15513 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15514
15515 format %{ $$template
15516 if (UseFastStosb) {
15517 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15518 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15519 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15520 } else if (UseXMMForObjInit) {
15521 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15522 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15523 $$emit$$"jmpq L_zero_64_bytes\n\t"
15524 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15525 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15526 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15527 $$emit$$"add 0x40,rax\n\t"
15528 $$emit$$"# L_zero_64_bytes:\n\t"
15529 $$emit$$"sub 0x8,rcx\n\t"
15530 $$emit$$"jge L_loop\n\t"
15531 $$emit$$"add 0x4,rcx\n\t"
15532 $$emit$$"jl L_tail\n\t"
15533 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15534 $$emit$$"add 0x20,rax\n\t"
15535 $$emit$$"sub 0x4,rcx\n\t"
15536 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15537 $$emit$$"add 0x4,rcx\n\t"
15538 $$emit$$"jle L_end\n\t"
15539 $$emit$$"dec rcx\n\t"
15540 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15541 $$emit$$"vmovq xmm0,(rax)\n\t"
15542 $$emit$$"add 0x8,rax\n\t"
15543 $$emit$$"dec rcx\n\t"
15544 $$emit$$"jge L_sloop\n\t"
15545 $$emit$$"# L_end:\n\t"
15546 } else {
15547 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15548 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15549 }
15550 %}
15551 ins_encode %{
15552 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15553 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15554 %}
15555 ins_pipe(pipe_slow);
15556 %}
15557
15558 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15559 Universe dummy, rFlagsReg cr)
15560 %{
15561 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15562 match(Set dummy (ClearArray (Binary cnt base) val));
15563 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15564
15565 format %{ $$template
15566 if (UseFastStosb) {
15567 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15568 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15569 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15570 } else if (UseXMMForObjInit) {
15571 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15572 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15573 $$emit$$"jmpq L_zero_64_bytes\n\t"
15574 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15575 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15576 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15577 $$emit$$"add 0x40,rax\n\t"
15578 $$emit$$"# L_zero_64_bytes:\n\t"
15579 $$emit$$"sub 0x8,rcx\n\t"
15580 $$emit$$"jge L_loop\n\t"
15581 $$emit$$"add 0x4,rcx\n\t"
15582 $$emit$$"jl L_tail\n\t"
15583 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15584 $$emit$$"add 0x20,rax\n\t"
15585 $$emit$$"sub 0x4,rcx\n\t"
15586 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15587 $$emit$$"add 0x4,rcx\n\t"
15588 $$emit$$"jle L_end\n\t"
15589 $$emit$$"dec rcx\n\t"
15590 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15591 $$emit$$"vmovq xmm0,(rax)\n\t"
15592 $$emit$$"add 0x8,rax\n\t"
15593 $$emit$$"dec rcx\n\t"
15594 $$emit$$"jge L_sloop\n\t"
15595 $$emit$$"# L_end:\n\t"
15596 } else {
15597 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15598 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15599 }
15600 %}
15601 ins_encode %{
15602 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15603 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15604 %}
15605 ins_pipe(pipe_slow);
15606 %}
15607
15608 // Small constant length ClearArray for AVX512 targets.
15609 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15610 %{
15611 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15612 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15613 match(Set dummy (ClearArray (Binary cnt base) val));
15614 ins_cost(100);
15615 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15616 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15617 ins_encode %{
15618 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15619 %}
15620 ins_pipe(pipe_slow);
15621 %}
15622
15623 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15624 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15625 %{
15626 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15627 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15628 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15629
15630 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15631 ins_encode %{
15632 __ string_compare($str1$$Register, $str2$$Register,
15633 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15634 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15635 %}
15636 ins_pipe( pipe_slow );
15637 %}
15638
15639 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15640 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15641 %{
15642 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15643 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15644 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15645
15646 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15647 ins_encode %{
15648 __ string_compare($str1$$Register, $str2$$Register,
15649 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15650 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15651 %}
15652 ins_pipe( pipe_slow );
15653 %}
15654
15655 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15656 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15657 %{
15658 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15659 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15660 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15661
15662 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15663 ins_encode %{
15664 __ string_compare($str1$$Register, $str2$$Register,
15665 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15666 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15667 %}
15668 ins_pipe( pipe_slow );
15669 %}
15670
15671 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15672 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15673 %{
15674 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15675 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15676 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15677
15678 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15679 ins_encode %{
15680 __ string_compare($str1$$Register, $str2$$Register,
15681 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15682 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15683 %}
15684 ins_pipe( pipe_slow );
15685 %}
15686
15687 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15688 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15689 %{
15690 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15691 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15692 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15693
15694 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15695 ins_encode %{
15696 __ string_compare($str1$$Register, $str2$$Register,
15697 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15698 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15699 %}
15700 ins_pipe( pipe_slow );
15701 %}
15702
15703 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15704 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15705 %{
15706 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15707 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15708 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15709
15710 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15711 ins_encode %{
15712 __ string_compare($str1$$Register, $str2$$Register,
15713 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15714 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15715 %}
15716 ins_pipe( pipe_slow );
15717 %}
15718
15719 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15720 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15721 %{
15722 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15723 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15724 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15725
15726 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15727 ins_encode %{
15728 __ string_compare($str2$$Register, $str1$$Register,
15729 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15730 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15731 %}
15732 ins_pipe( pipe_slow );
15733 %}
15734
15735 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15736 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15737 %{
15738 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15739 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15740 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15741
15742 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15743 ins_encode %{
15744 __ string_compare($str2$$Register, $str1$$Register,
15745 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15746 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15747 %}
15748 ins_pipe( pipe_slow );
15749 %}
15750
15751 // fast search of substring with known size.
15752 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15753 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15754 %{
15755 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15756 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15757 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15758
15759 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15760 ins_encode %{
15761 int icnt2 = (int)$int_cnt2$$constant;
15762 if (icnt2 >= 16) {
15763 // IndexOf for constant substrings with size >= 16 elements
15764 // which don't need to be loaded through stack.
15765 __ string_indexofC8($str1$$Register, $str2$$Register,
15766 $cnt1$$Register, $cnt2$$Register,
15767 icnt2, $result$$Register,
15768 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15769 } else {
15770 // Small strings are loaded through stack if they cross page boundary.
15771 __ string_indexof($str1$$Register, $str2$$Register,
15772 $cnt1$$Register, $cnt2$$Register,
15773 icnt2, $result$$Register,
15774 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15775 }
15776 %}
15777 ins_pipe( pipe_slow );
15778 %}
15779
15780 // fast search of substring with known size.
15781 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15782 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15783 %{
15784 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15785 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15786 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15787
15788 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15789 ins_encode %{
15790 int icnt2 = (int)$int_cnt2$$constant;
15791 if (icnt2 >= 8) {
15792 // IndexOf for constant substrings with size >= 8 elements
15793 // which don't need to be loaded through stack.
15794 __ string_indexofC8($str1$$Register, $str2$$Register,
15795 $cnt1$$Register, $cnt2$$Register,
15796 icnt2, $result$$Register,
15797 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15798 } else {
15799 // Small strings are loaded through stack if they cross page boundary.
15800 __ string_indexof($str1$$Register, $str2$$Register,
15801 $cnt1$$Register, $cnt2$$Register,
15802 icnt2, $result$$Register,
15803 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15804 }
15805 %}
15806 ins_pipe( pipe_slow );
15807 %}
15808
15809 // fast search of substring with known size.
15810 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15811 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15812 %{
15813 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15814 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15815 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15816
15817 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15818 ins_encode %{
15819 int icnt2 = (int)$int_cnt2$$constant;
15820 if (icnt2 >= 8) {
15821 // IndexOf for constant substrings with size >= 8 elements
15822 // which don't need to be loaded through stack.
15823 __ string_indexofC8($str1$$Register, $str2$$Register,
15824 $cnt1$$Register, $cnt2$$Register,
15825 icnt2, $result$$Register,
15826 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15827 } else {
15828 // Small strings are loaded through stack if they cross page boundary.
15829 __ string_indexof($str1$$Register, $str2$$Register,
15830 $cnt1$$Register, $cnt2$$Register,
15831 icnt2, $result$$Register,
15832 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15833 }
15834 %}
15835 ins_pipe( pipe_slow );
15836 %}
15837
15838 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15839 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15840 %{
15841 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15842 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15843 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15844
15845 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15846 ins_encode %{
15847 __ string_indexof($str1$$Register, $str2$$Register,
15848 $cnt1$$Register, $cnt2$$Register,
15849 (-1), $result$$Register,
15850 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15851 %}
15852 ins_pipe( pipe_slow );
15853 %}
15854
15855 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15856 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15857 %{
15858 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15859 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15860 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15861
15862 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15863 ins_encode %{
15864 __ string_indexof($str1$$Register, $str2$$Register,
15865 $cnt1$$Register, $cnt2$$Register,
15866 (-1), $result$$Register,
15867 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15868 %}
15869 ins_pipe( pipe_slow );
15870 %}
15871
15872 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15873 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15874 %{
15875 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15876 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15877 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15878
15879 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15880 ins_encode %{
15881 __ string_indexof($str1$$Register, $str2$$Register,
15882 $cnt1$$Register, $cnt2$$Register,
15883 (-1), $result$$Register,
15884 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15885 %}
15886 ins_pipe( pipe_slow );
15887 %}
15888
15889 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15890 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15891 %{
15892 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15893 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15894 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15895 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15896 ins_encode %{
15897 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15898 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15899 %}
15900 ins_pipe( pipe_slow );
15901 %}
15902
15903 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15904 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15905 %{
15906 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15907 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15908 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15909 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15910 ins_encode %{
15911 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15912 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15913 %}
15914 ins_pipe( pipe_slow );
15915 %}
15916
15917 // fast string equals
15918 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15919 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15920 %{
15921 predicate(!VM_Version::supports_avx512vlbw());
15922 match(Set result (StrEquals (Binary str1 str2) cnt));
15923 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15924
15925 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15926 ins_encode %{
15927 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15928 $cnt$$Register, $result$$Register, $tmp3$$Register,
15929 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15930 %}
15931 ins_pipe( pipe_slow );
15932 %}
15933
15934 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15935 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15936 %{
15937 predicate(VM_Version::supports_avx512vlbw());
15938 match(Set result (StrEquals (Binary str1 str2) cnt));
15939 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15940
15941 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15942 ins_encode %{
15943 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15944 $cnt$$Register, $result$$Register, $tmp3$$Register,
15945 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15946 %}
15947 ins_pipe( pipe_slow );
15948 %}
15949
15950 // fast array equals
15951 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15952 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15953 %{
15954 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15955 match(Set result (AryEq ary1 ary2));
15956 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15957
15958 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15959 ins_encode %{
15960 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15961 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15962 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15963 %}
15964 ins_pipe( pipe_slow );
15965 %}
15966
15967 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15968 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15969 %{
15970 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15971 match(Set result (AryEq ary1 ary2));
15972 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15973
15974 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15975 ins_encode %{
15976 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15977 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15978 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15979 %}
15980 ins_pipe( pipe_slow );
15981 %}
15982
15983 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15984 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15985 %{
15986 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15987 match(Set result (AryEq ary1 ary2));
15988 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15989
15990 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15991 ins_encode %{
15992 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15993 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15994 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15995 %}
15996 ins_pipe( pipe_slow );
15997 %}
15998
15999 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16000 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16001 %{
16002 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16003 match(Set result (AryEq ary1 ary2));
16004 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16005
16006 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16007 ins_encode %{
16008 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16009 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16010 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16011 %}
16012 ins_pipe( pipe_slow );
16013 %}
16014
16015 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16016 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16017 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16018 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16019 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16020 %{
16021 predicate(UseAVX >= 2);
16022 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16023 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16024 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16025 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16026 USE basic_type, KILL cr);
16027
16028 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16029 ins_encode %{
16030 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16031 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16032 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16033 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16034 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16035 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16036 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16037 %}
16038 ins_pipe( pipe_slow );
16039 %}
16040
16041 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16042 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16043 %{
16044 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16045 match(Set result (CountPositives ary1 len));
16046 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16047
16048 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16049 ins_encode %{
16050 __ count_positives($ary1$$Register, $len$$Register,
16051 $result$$Register, $tmp3$$Register,
16052 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16053 %}
16054 ins_pipe( pipe_slow );
16055 %}
16056
16057 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16058 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16059 %{
16060 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16061 match(Set result (CountPositives ary1 len));
16062 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16063
16064 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16065 ins_encode %{
16066 __ count_positives($ary1$$Register, $len$$Register,
16067 $result$$Register, $tmp3$$Register,
16068 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16069 %}
16070 ins_pipe( pipe_slow );
16071 %}
16072
16073 // fast char[] to byte[] compression
16074 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16075 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16076 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16077 match(Set result (StrCompressedCopy src (Binary dst len)));
16078 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16079 USE_KILL len, KILL tmp5, KILL cr);
16080
16081 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16082 ins_encode %{
16083 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16084 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16085 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16086 knoreg, knoreg);
16087 %}
16088 ins_pipe( pipe_slow );
16089 %}
16090
16091 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16092 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16093 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16094 match(Set result (StrCompressedCopy src (Binary dst len)));
16095 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16096 USE_KILL len, KILL tmp5, KILL cr);
16097
16098 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16099 ins_encode %{
16100 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16101 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16102 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16103 $ktmp1$$KRegister, $ktmp2$$KRegister);
16104 %}
16105 ins_pipe( pipe_slow );
16106 %}
16107 // fast byte[] to char[] inflation
16108 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16109 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16110 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16111 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16112 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16113
16114 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16115 ins_encode %{
16116 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16117 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16118 %}
16119 ins_pipe( pipe_slow );
16120 %}
16121
16122 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16123 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16124 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16125 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16126 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16127
16128 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16129 ins_encode %{
16130 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16131 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16132 %}
16133 ins_pipe( pipe_slow );
16134 %}
16135
16136 // encode char[] to byte[] in ISO_8859_1
16137 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16138 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16139 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16140 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16141 match(Set result (EncodeISOArray src (Binary dst len)));
16142 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16143
16144 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16145 ins_encode %{
16146 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16147 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16148 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16149 %}
16150 ins_pipe( pipe_slow );
16151 %}
16152
16153 // encode char[] to byte[] in ASCII
16154 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16155 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16156 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16157 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16158 match(Set result (EncodeISOArray src (Binary dst len)));
16159 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16160
16161 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16162 ins_encode %{
16163 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16164 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16165 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16166 %}
16167 ins_pipe( pipe_slow );
16168 %}
16169
16170 //----------Overflow Math Instructions-----------------------------------------
16171
16172 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16173 %{
16174 match(Set cr (OverflowAddI op1 op2));
16175 effect(DEF cr, USE_KILL op1, USE op2);
16176
16177 format %{ "addl $op1, $op2\t# overflow check int" %}
16178
16179 ins_encode %{
16180 __ addl($op1$$Register, $op2$$Register);
16181 %}
16182 ins_pipe(ialu_reg_reg);
16183 %}
16184
16185 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16186 %{
16187 match(Set cr (OverflowAddI op1 op2));
16188 effect(DEF cr, USE_KILL op1, USE op2);
16189
16190 format %{ "addl $op1, $op2\t# overflow check int" %}
16191
16192 ins_encode %{
16193 __ addl($op1$$Register, $op2$$constant);
16194 %}
16195 ins_pipe(ialu_reg_reg);
16196 %}
16197
16198 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16199 %{
16200 match(Set cr (OverflowAddL op1 op2));
16201 effect(DEF cr, USE_KILL op1, USE op2);
16202
16203 format %{ "addq $op1, $op2\t# overflow check long" %}
16204 ins_encode %{
16205 __ addq($op1$$Register, $op2$$Register);
16206 %}
16207 ins_pipe(ialu_reg_reg);
16208 %}
16209
16210 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16211 %{
16212 match(Set cr (OverflowAddL op1 op2));
16213 effect(DEF cr, USE_KILL op1, USE op2);
16214
16215 format %{ "addq $op1, $op2\t# overflow check long" %}
16216 ins_encode %{
16217 __ addq($op1$$Register, $op2$$constant);
16218 %}
16219 ins_pipe(ialu_reg_reg);
16220 %}
16221
16222 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16223 %{
16224 match(Set cr (OverflowSubI op1 op2));
16225
16226 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16227 ins_encode %{
16228 __ cmpl($op1$$Register, $op2$$Register);
16229 %}
16230 ins_pipe(ialu_reg_reg);
16231 %}
16232
16233 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16234 %{
16235 match(Set cr (OverflowSubI op1 op2));
16236
16237 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16238 ins_encode %{
16239 __ cmpl($op1$$Register, $op2$$constant);
16240 %}
16241 ins_pipe(ialu_reg_reg);
16242 %}
16243
16244 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16245 %{
16246 match(Set cr (OverflowSubL op1 op2));
16247
16248 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16249 ins_encode %{
16250 __ cmpq($op1$$Register, $op2$$Register);
16251 %}
16252 ins_pipe(ialu_reg_reg);
16253 %}
16254
16255 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16256 %{
16257 match(Set cr (OverflowSubL op1 op2));
16258
16259 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16260 ins_encode %{
16261 __ cmpq($op1$$Register, $op2$$constant);
16262 %}
16263 ins_pipe(ialu_reg_reg);
16264 %}
16265
16266 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16267 %{
16268 match(Set cr (OverflowSubI zero op2));
16269 effect(DEF cr, USE_KILL op2);
16270
16271 format %{ "negl $op2\t# overflow check int" %}
16272 ins_encode %{
16273 __ negl($op2$$Register);
16274 %}
16275 ins_pipe(ialu_reg_reg);
16276 %}
16277
16278 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16279 %{
16280 match(Set cr (OverflowSubL zero op2));
16281 effect(DEF cr, USE_KILL op2);
16282
16283 format %{ "negq $op2\t# overflow check long" %}
16284 ins_encode %{
16285 __ negq($op2$$Register);
16286 %}
16287 ins_pipe(ialu_reg_reg);
16288 %}
16289
16290 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16291 %{
16292 match(Set cr (OverflowMulI op1 op2));
16293 effect(DEF cr, USE_KILL op1, USE op2);
16294
16295 format %{ "imull $op1, $op2\t# overflow check int" %}
16296 ins_encode %{
16297 __ imull($op1$$Register, $op2$$Register);
16298 %}
16299 ins_pipe(ialu_reg_reg_alu0);
16300 %}
16301
16302 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16303 %{
16304 match(Set cr (OverflowMulI op1 op2));
16305 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16306
16307 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16308 ins_encode %{
16309 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16310 %}
16311 ins_pipe(ialu_reg_reg_alu0);
16312 %}
16313
16314 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16315 %{
16316 match(Set cr (OverflowMulL op1 op2));
16317 effect(DEF cr, USE_KILL op1, USE op2);
16318
16319 format %{ "imulq $op1, $op2\t# overflow check long" %}
16320 ins_encode %{
16321 __ imulq($op1$$Register, $op2$$Register);
16322 %}
16323 ins_pipe(ialu_reg_reg_alu0);
16324 %}
16325
16326 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16327 %{
16328 match(Set cr (OverflowMulL op1 op2));
16329 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16330
16331 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16332 ins_encode %{
16333 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16334 %}
16335 ins_pipe(ialu_reg_reg_alu0);
16336 %}
16337
16338
16339 //----------Control Flow Instructions------------------------------------------
16340 // Signed compare Instructions
16341
16342 // XXX more variants!!
16343 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16344 %{
16345 match(Set cr (CmpI op1 op2));
16346 effect(DEF cr, USE op1, USE op2);
16347
16348 format %{ "cmpl $op1, $op2" %}
16349 ins_encode %{
16350 __ cmpl($op1$$Register, $op2$$Register);
16351 %}
16352 ins_pipe(ialu_cr_reg_reg);
16353 %}
16354
16355 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16356 %{
16357 match(Set cr (CmpI op1 op2));
16358
16359 format %{ "cmpl $op1, $op2" %}
16360 ins_encode %{
16361 __ cmpl($op1$$Register, $op2$$constant);
16362 %}
16363 ins_pipe(ialu_cr_reg_imm);
16364 %}
16365
16366 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16367 %{
16368 match(Set cr (CmpI op1 (LoadI op2)));
16369
16370 ins_cost(500); // XXX
16371 format %{ "cmpl $op1, $op2" %}
16372 ins_encode %{
16373 __ cmpl($op1$$Register, $op2$$Address);
16374 %}
16375 ins_pipe(ialu_cr_reg_mem);
16376 %}
16377
16378 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16379 %{
16380 match(Set cr (CmpI src zero));
16381
16382 format %{ "testl $src, $src" %}
16383 ins_encode %{
16384 __ testl($src$$Register, $src$$Register);
16385 %}
16386 ins_pipe(ialu_cr_reg_imm);
16387 %}
16388
16389 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16390 %{
16391 match(Set cr (CmpI (AndI src con) zero));
16392
16393 format %{ "testl $src, $con" %}
16394 ins_encode %{
16395 __ testl($src$$Register, $con$$constant);
16396 %}
16397 ins_pipe(ialu_cr_reg_imm);
16398 %}
16399
16400 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16401 %{
16402 match(Set cr (CmpI (AndI src1 src2) zero));
16403
16404 format %{ "testl $src1, $src2" %}
16405 ins_encode %{
16406 __ testl($src1$$Register, $src2$$Register);
16407 %}
16408 ins_pipe(ialu_cr_reg_imm);
16409 %}
16410
16411 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16412 %{
16413 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16414
16415 format %{ "testl $src, $mem" %}
16416 ins_encode %{
16417 __ testl($src$$Register, $mem$$Address);
16418 %}
16419 ins_pipe(ialu_cr_reg_mem);
16420 %}
16421
16422 // Unsigned compare Instructions; really, same as signed except they
16423 // produce an rFlagsRegU instead of rFlagsReg.
16424 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16425 %{
16426 match(Set cr (CmpU op1 op2));
16427
16428 format %{ "cmpl $op1, $op2\t# unsigned" %}
16429 ins_encode %{
16430 __ cmpl($op1$$Register, $op2$$Register);
16431 %}
16432 ins_pipe(ialu_cr_reg_reg);
16433 %}
16434
16435 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16436 %{
16437 match(Set cr (CmpU op1 op2));
16438
16439 format %{ "cmpl $op1, $op2\t# unsigned" %}
16440 ins_encode %{
16441 __ cmpl($op1$$Register, $op2$$constant);
16442 %}
16443 ins_pipe(ialu_cr_reg_imm);
16444 %}
16445
16446 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16447 %{
16448 match(Set cr (CmpU op1 (LoadI op2)));
16449
16450 ins_cost(500); // XXX
16451 format %{ "cmpl $op1, $op2\t# unsigned" %}
16452 ins_encode %{
16453 __ cmpl($op1$$Register, $op2$$Address);
16454 %}
16455 ins_pipe(ialu_cr_reg_mem);
16456 %}
16457
16458 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16459 %{
16460 match(Set cr (CmpU src zero));
16461
16462 format %{ "testl $src, $src\t# unsigned" %}
16463 ins_encode %{
16464 __ testl($src$$Register, $src$$Register);
16465 %}
16466 ins_pipe(ialu_cr_reg_imm);
16467 %}
16468
16469 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16470 %{
16471 match(Set cr (CmpP op1 op2));
16472
16473 format %{ "cmpq $op1, $op2\t# ptr" %}
16474 ins_encode %{
16475 __ cmpq($op1$$Register, $op2$$Register);
16476 %}
16477 ins_pipe(ialu_cr_reg_reg);
16478 %}
16479
16480 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16481 %{
16482 match(Set cr (CmpP op1 (LoadP op2)));
16483 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16484
16485 ins_cost(500); // XXX
16486 format %{ "cmpq $op1, $op2\t# ptr" %}
16487 ins_encode %{
16488 __ cmpq($op1$$Register, $op2$$Address);
16489 %}
16490 ins_pipe(ialu_cr_reg_mem);
16491 %}
16492
16493 // XXX this is generalized by compP_rReg_mem???
16494 // Compare raw pointer (used in out-of-heap check).
16495 // Only works because non-oop pointers must be raw pointers
16496 // and raw pointers have no anti-dependencies.
16497 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16498 %{
16499 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16500 n->in(2)->as_Load()->barrier_data() == 0);
16501 match(Set cr (CmpP op1 (LoadP op2)));
16502
16503 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16504 ins_encode %{
16505 __ cmpq($op1$$Register, $op2$$Address);
16506 %}
16507 ins_pipe(ialu_cr_reg_mem);
16508 %}
16509
16510 // This will generate a signed flags result. This should be OK since
16511 // any compare to a zero should be eq/neq.
16512 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16513 %{
16514 match(Set cr (CmpP src zero));
16515
16516 format %{ "testq $src, $src\t# ptr" %}
16517 ins_encode %{
16518 __ testq($src$$Register, $src$$Register);
16519 %}
16520 ins_pipe(ialu_cr_reg_imm);
16521 %}
16522
16523 // This will generate a signed flags result. This should be OK since
16524 // any compare to a zero should be eq/neq.
16525 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16526 %{
16527 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16528 n->in(1)->as_Load()->barrier_data() == 0);
16529 match(Set cr (CmpP (LoadP op) zero));
16530
16531 ins_cost(500); // XXX
16532 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16533 ins_encode %{
16534 __ testq($op$$Address, 0xFFFFFFFF);
16535 %}
16536 ins_pipe(ialu_cr_reg_imm);
16537 %}
16538
16539 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16540 %{
16541 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16542 n->in(1)->as_Load()->barrier_data() == 0);
16543 match(Set cr (CmpP (LoadP mem) zero));
16544
16545 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16546 ins_encode %{
16547 __ cmpq(r12, $mem$$Address);
16548 %}
16549 ins_pipe(ialu_cr_reg_mem);
16550 %}
16551
16552 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16553 %{
16554 match(Set cr (CmpN op1 op2));
16555
16556 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16557 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16558 ins_pipe(ialu_cr_reg_reg);
16559 %}
16560
16561 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16562 %{
16563 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16564 match(Set cr (CmpN src (LoadN mem)));
16565
16566 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16567 ins_encode %{
16568 __ cmpl($src$$Register, $mem$$Address);
16569 %}
16570 ins_pipe(ialu_cr_reg_mem);
16571 %}
16572
16573 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16574 match(Set cr (CmpN op1 op2));
16575
16576 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16577 ins_encode %{
16578 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16579 %}
16580 ins_pipe(ialu_cr_reg_imm);
16581 %}
16582
16583 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16584 %{
16585 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16586 match(Set cr (CmpN src (LoadN mem)));
16587
16588 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16589 ins_encode %{
16590 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16591 %}
16592 ins_pipe(ialu_cr_reg_mem);
16593 %}
16594
16595 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16596 match(Set cr (CmpN op1 op2));
16597
16598 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16599 ins_encode %{
16600 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16601 %}
16602 ins_pipe(ialu_cr_reg_imm);
16603 %}
16604
16605 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16606 %{
16607 predicate(!UseCompactObjectHeaders);
16608 match(Set cr (CmpN src (LoadNKlass mem)));
16609
16610 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16611 ins_encode %{
16612 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16613 %}
16614 ins_pipe(ialu_cr_reg_mem);
16615 %}
16616
16617 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16618 match(Set cr (CmpN src zero));
16619
16620 format %{ "testl $src, $src\t# compressed ptr" %}
16621 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16622 ins_pipe(ialu_cr_reg_imm);
16623 %}
16624
16625 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16626 %{
16627 predicate(CompressedOops::base() != nullptr &&
16628 n->in(1)->as_Load()->barrier_data() == 0);
16629 match(Set cr (CmpN (LoadN mem) zero));
16630
16631 ins_cost(500); // XXX
16632 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16633 ins_encode %{
16634 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16635 %}
16636 ins_pipe(ialu_cr_reg_mem);
16637 %}
16638
16639 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16640 %{
16641 predicate(CompressedOops::base() == nullptr &&
16642 n->in(1)->as_Load()->barrier_data() == 0);
16643 match(Set cr (CmpN (LoadN mem) zero));
16644
16645 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16646 ins_encode %{
16647 __ cmpl(r12, $mem$$Address);
16648 %}
16649 ins_pipe(ialu_cr_reg_mem);
16650 %}
16651
16652 // Yanked all unsigned pointer compare operations.
16653 // Pointer compares are done with CmpP which is already unsigned.
16654
16655 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16656 %{
16657 match(Set cr (CmpL op1 op2));
16658
16659 format %{ "cmpq $op1, $op2" %}
16660 ins_encode %{
16661 __ cmpq($op1$$Register, $op2$$Register);
16662 %}
16663 ins_pipe(ialu_cr_reg_reg);
16664 %}
16665
16666 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16667 %{
16668 match(Set cr (CmpL op1 op2));
16669
16670 format %{ "cmpq $op1, $op2" %}
16671 ins_encode %{
16672 __ cmpq($op1$$Register, $op2$$constant);
16673 %}
16674 ins_pipe(ialu_cr_reg_imm);
16675 %}
16676
16677 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16678 %{
16679 match(Set cr (CmpL op1 (LoadL op2)));
16680
16681 format %{ "cmpq $op1, $op2" %}
16682 ins_encode %{
16683 __ cmpq($op1$$Register, $op2$$Address);
16684 %}
16685 ins_pipe(ialu_cr_reg_mem);
16686 %}
16687
16688 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16689 %{
16690 match(Set cr (CmpL src zero));
16691
16692 format %{ "testq $src, $src" %}
16693 ins_encode %{
16694 __ testq($src$$Register, $src$$Register);
16695 %}
16696 ins_pipe(ialu_cr_reg_imm);
16697 %}
16698
16699 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16700 %{
16701 match(Set cr (CmpL (AndL src con) zero));
16702
16703 format %{ "testq $src, $con\t# long" %}
16704 ins_encode %{
16705 __ testq($src$$Register, $con$$constant);
16706 %}
16707 ins_pipe(ialu_cr_reg_imm);
16708 %}
16709
16710 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16711 %{
16712 match(Set cr (CmpL (AndL src1 src2) zero));
16713
16714 format %{ "testq $src1, $src2\t# long" %}
16715 ins_encode %{
16716 __ testq($src1$$Register, $src2$$Register);
16717 %}
16718 ins_pipe(ialu_cr_reg_imm);
16719 %}
16720
16721 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16722 %{
16723 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16724
16725 format %{ "testq $src, $mem" %}
16726 ins_encode %{
16727 __ testq($src$$Register, $mem$$Address);
16728 %}
16729 ins_pipe(ialu_cr_reg_mem);
16730 %}
16731
16732 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16733 %{
16734 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16735
16736 format %{ "testq $src, $mem" %}
16737 ins_encode %{
16738 __ testq($src$$Register, $mem$$Address);
16739 %}
16740 ins_pipe(ialu_cr_reg_mem);
16741 %}
16742
16743 // Manifest a CmpU result in an integer register. Very painful.
16744 // This is the test to avoid.
16745 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16746 %{
16747 match(Set dst (CmpU3 src1 src2));
16748 effect(KILL flags);
16749
16750 ins_cost(275); // XXX
16751 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16752 "movl $dst, -1\n\t"
16753 "jb,u done\n\t"
16754 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16755 "done:" %}
16756 ins_encode %{
16757 Label done;
16758 __ cmpl($src1$$Register, $src2$$Register);
16759 __ movl($dst$$Register, -1);
16760 __ jccb(Assembler::below, done);
16761 __ setcc(Assembler::notZero, $dst$$Register);
16762 __ bind(done);
16763 %}
16764 ins_pipe(pipe_slow);
16765 %}
16766
16767 // Manifest a CmpL result in an integer register. Very painful.
16768 // This is the test to avoid.
16769 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16770 %{
16771 match(Set dst (CmpL3 src1 src2));
16772 effect(KILL flags);
16773
16774 ins_cost(275); // XXX
16775 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16776 "movl $dst, -1\n\t"
16777 "jl,s done\n\t"
16778 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16779 "done:" %}
16780 ins_encode %{
16781 Label done;
16782 __ cmpq($src1$$Register, $src2$$Register);
16783 __ movl($dst$$Register, -1);
16784 __ jccb(Assembler::less, done);
16785 __ setcc(Assembler::notZero, $dst$$Register);
16786 __ bind(done);
16787 %}
16788 ins_pipe(pipe_slow);
16789 %}
16790
16791 // Manifest a CmpUL result in an integer register. Very painful.
16792 // This is the test to avoid.
16793 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16794 %{
16795 match(Set dst (CmpUL3 src1 src2));
16796 effect(KILL flags);
16797
16798 ins_cost(275); // XXX
16799 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16800 "movl $dst, -1\n\t"
16801 "jb,u done\n\t"
16802 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16803 "done:" %}
16804 ins_encode %{
16805 Label done;
16806 __ cmpq($src1$$Register, $src2$$Register);
16807 __ movl($dst$$Register, -1);
16808 __ jccb(Assembler::below, done);
16809 __ setcc(Assembler::notZero, $dst$$Register);
16810 __ bind(done);
16811 %}
16812 ins_pipe(pipe_slow);
16813 %}
16814
16815 // Unsigned long compare Instructions; really, same as signed long except they
16816 // produce an rFlagsRegU instead of rFlagsReg.
16817 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16818 %{
16819 match(Set cr (CmpUL op1 op2));
16820
16821 format %{ "cmpq $op1, $op2\t# unsigned" %}
16822 ins_encode %{
16823 __ cmpq($op1$$Register, $op2$$Register);
16824 %}
16825 ins_pipe(ialu_cr_reg_reg);
16826 %}
16827
16828 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16829 %{
16830 match(Set cr (CmpUL op1 op2));
16831
16832 format %{ "cmpq $op1, $op2\t# unsigned" %}
16833 ins_encode %{
16834 __ cmpq($op1$$Register, $op2$$constant);
16835 %}
16836 ins_pipe(ialu_cr_reg_imm);
16837 %}
16838
16839 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16840 %{
16841 match(Set cr (CmpUL op1 (LoadL op2)));
16842
16843 format %{ "cmpq $op1, $op2\t# unsigned" %}
16844 ins_encode %{
16845 __ cmpq($op1$$Register, $op2$$Address);
16846 %}
16847 ins_pipe(ialu_cr_reg_mem);
16848 %}
16849
16850 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16851 %{
16852 match(Set cr (CmpUL src zero));
16853
16854 format %{ "testq $src, $src\t# unsigned" %}
16855 ins_encode %{
16856 __ testq($src$$Register, $src$$Register);
16857 %}
16858 ins_pipe(ialu_cr_reg_imm);
16859 %}
16860
16861 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16862 %{
16863 match(Set cr (CmpI (LoadB mem) imm));
16864
16865 ins_cost(125);
16866 format %{ "cmpb $mem, $imm" %}
16867 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16868 ins_pipe(ialu_cr_reg_mem);
16869 %}
16870
16871 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16872 %{
16873 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16874
16875 ins_cost(125);
16876 format %{ "testb $mem, $imm\t# ubyte" %}
16877 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16878 ins_pipe(ialu_cr_reg_mem);
16879 %}
16880
16881 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16882 %{
16883 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16884
16885 ins_cost(125);
16886 format %{ "testb $mem, $imm\t# byte" %}
16887 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16888 ins_pipe(ialu_cr_reg_mem);
16889 %}
16890
16891 //----------Max and Min--------------------------------------------------------
16892 // Min Instructions
16893
16894 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16895 %{
16896 predicate(!UseAPX);
16897 effect(USE_DEF dst, USE src, USE cr);
16898
16899 format %{ "cmovlgt $dst, $src\t# min" %}
16900 ins_encode %{
16901 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16902 %}
16903 ins_pipe(pipe_cmov_reg);
16904 %}
16905
16906 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16907 %{
16908 predicate(UseAPX);
16909 effect(DEF dst, USE src1, USE src2, USE cr);
16910
16911 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16912 ins_encode %{
16913 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16914 %}
16915 ins_pipe(pipe_cmov_reg);
16916 %}
16917
16918 instruct minI_rReg(rRegI dst, rRegI src)
16919 %{
16920 predicate(!UseAPX);
16921 match(Set dst (MinI dst src));
16922
16923 ins_cost(200);
16924 expand %{
16925 rFlagsReg cr;
16926 compI_rReg(cr, dst, src);
16927 cmovI_reg_g(dst, src, cr);
16928 %}
16929 %}
16930
16931 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16932 %{
16933 predicate(UseAPX);
16934 match(Set dst (MinI src1 src2));
16935 effect(DEF dst, USE src1, USE src2);
16936 flag(PD::Flag_ndd_demotable_opr1);
16937
16938 ins_cost(200);
16939 expand %{
16940 rFlagsReg cr;
16941 compI_rReg(cr, src1, src2);
16942 cmovI_reg_g_ndd(dst, src1, src2, cr);
16943 %}
16944 %}
16945
16946 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16947 %{
16948 predicate(!UseAPX);
16949 effect(USE_DEF dst, USE src, USE cr);
16950
16951 format %{ "cmovllt $dst, $src\t# max" %}
16952 ins_encode %{
16953 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16954 %}
16955 ins_pipe(pipe_cmov_reg);
16956 %}
16957
16958 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16959 %{
16960 predicate(UseAPX);
16961 effect(DEF dst, USE src1, USE src2, USE cr);
16962
16963 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16964 ins_encode %{
16965 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16966 %}
16967 ins_pipe(pipe_cmov_reg);
16968 %}
16969
16970 instruct maxI_rReg(rRegI dst, rRegI src)
16971 %{
16972 predicate(!UseAPX);
16973 match(Set dst (MaxI dst src));
16974
16975 ins_cost(200);
16976 expand %{
16977 rFlagsReg cr;
16978 compI_rReg(cr, dst, src);
16979 cmovI_reg_l(dst, src, cr);
16980 %}
16981 %}
16982
16983 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16984 %{
16985 predicate(UseAPX);
16986 match(Set dst (MaxI src1 src2));
16987 effect(DEF dst, USE src1, USE src2);
16988 flag(PD::Flag_ndd_demotable_opr1);
16989
16990 ins_cost(200);
16991 expand %{
16992 rFlagsReg cr;
16993 compI_rReg(cr, src1, src2);
16994 cmovI_reg_l_ndd(dst, src1, src2, cr);
16995 %}
16996 %}
16997
16998 // ============================================================================
16999 // Branch Instructions
17000
17001 // Jump Direct - Label defines a relative address from JMP+1
17002 instruct jmpDir(label labl)
17003 %{
17004 match(Goto);
17005 effect(USE labl);
17006
17007 ins_cost(300);
17008 format %{ "jmp $labl" %}
17009 size(5);
17010 ins_encode %{
17011 Label* L = $labl$$label;
17012 __ jmp(*L, false); // Always long jump
17013 %}
17014 ins_pipe(pipe_jmp);
17015 %}
17016
17017 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17018 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17019 %{
17020 match(If cop cr);
17021 effect(USE labl);
17022
17023 ins_cost(300);
17024 format %{ "j$cop $labl" %}
17025 size(6);
17026 ins_encode %{
17027 Label* L = $labl$$label;
17028 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17029 %}
17030 ins_pipe(pipe_jcc);
17031 %}
17032
17033 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17034 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17035 %{
17036 match(CountedLoopEnd cop cr);
17037 effect(USE labl);
17038
17039 ins_cost(300);
17040 format %{ "j$cop $labl\t# loop end" %}
17041 size(6);
17042 ins_encode %{
17043 Label* L = $labl$$label;
17044 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17045 %}
17046 ins_pipe(pipe_jcc);
17047 %}
17048
17049 // Jump Direct Conditional - using unsigned comparison
17050 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17051 match(If cop cmp);
17052 effect(USE labl);
17053
17054 ins_cost(300);
17055 format %{ "j$cop,u $labl" %}
17056 size(6);
17057 ins_encode %{
17058 Label* L = $labl$$label;
17059 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17060 %}
17061 ins_pipe(pipe_jcc);
17062 %}
17063
17064 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17065 match(If cop cmp);
17066 effect(USE labl);
17067
17068 ins_cost(200);
17069 format %{ "j$cop,u $labl" %}
17070 size(6);
17071 ins_encode %{
17072 Label* L = $labl$$label;
17073 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17074 %}
17075 ins_pipe(pipe_jcc);
17076 %}
17077
17078 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17079 match(If cop cmp);
17080 effect(USE labl);
17081
17082 ins_cost(200);
17083 format %{ $$template
17084 if ($cop$$cmpcode == Assembler::notEqual) {
17085 $$emit$$"jp,u $labl\n\t"
17086 $$emit$$"j$cop,u $labl"
17087 } else {
17088 $$emit$$"jp,u done\n\t"
17089 $$emit$$"j$cop,u $labl\n\t"
17090 $$emit$$"done:"
17091 }
17092 %}
17093 ins_encode %{
17094 Label* l = $labl$$label;
17095 if ($cop$$cmpcode == Assembler::notEqual) {
17096 __ jcc(Assembler::parity, *l, false);
17097 __ jcc(Assembler::notEqual, *l, false);
17098 } else if ($cop$$cmpcode == Assembler::equal) {
17099 Label done;
17100 __ jccb(Assembler::parity, done);
17101 __ jcc(Assembler::equal, *l, false);
17102 __ bind(done);
17103 } else {
17104 ShouldNotReachHere();
17105 }
17106 %}
17107 ins_pipe(pipe_jcc);
17108 %}
17109
17110 // ============================================================================
17111 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17112 // superklass array for an instance of the superklass. Set a hidden
17113 // internal cache on a hit (cache is checked with exposed code in
17114 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17115 // encoding ALSO sets flags.
17116
17117 instruct partialSubtypeCheck(rdi_RegP result,
17118 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17119 rFlagsReg cr)
17120 %{
17121 match(Set result (PartialSubtypeCheck sub super));
17122 predicate(!UseSecondarySupersTable);
17123 effect(KILL rcx, KILL cr);
17124
17125 ins_cost(1100); // slightly larger than the next version
17126 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17127 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17128 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17129 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17130 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17131 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17132 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17133 "miss:\t" %}
17134
17135 ins_encode %{
17136 Label miss;
17137 // NB: Callers may assume that, when $result is a valid register,
17138 // check_klass_subtype_slow_path_linear sets it to a nonzero
17139 // value.
17140 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17141 $rcx$$Register, $result$$Register,
17142 nullptr, &miss,
17143 /*set_cond_codes:*/ true);
17144 __ xorptr($result$$Register, $result$$Register);
17145 __ bind(miss);
17146 %}
17147
17148 ins_pipe(pipe_slow);
17149 %}
17150
17151 // ============================================================================
17152 // Two versions of hashtable-based partialSubtypeCheck, both used when
17153 // we need to search for a super class in the secondary supers array.
17154 // The first is used when we don't know _a priori_ the class being
17155 // searched for. The second, far more common, is used when we do know:
17156 // this is used for instanceof, checkcast, and any case where C2 can
17157 // determine it by constant propagation.
17158
17159 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17160 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17161 rFlagsReg cr)
17162 %{
17163 match(Set result (PartialSubtypeCheck sub super));
17164 predicate(UseSecondarySupersTable);
17165 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17166
17167 ins_cost(1000);
17168 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17169
17170 ins_encode %{
17171 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17172 $temp3$$Register, $temp4$$Register, $result$$Register);
17173 %}
17174
17175 ins_pipe(pipe_slow);
17176 %}
17177
17178 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17179 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17180 rFlagsReg cr)
17181 %{
17182 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17183 predicate(UseSecondarySupersTable);
17184 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17185
17186 ins_cost(700); // smaller than the next version
17187 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17188
17189 ins_encode %{
17190 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17191 if (InlineSecondarySupersTest) {
17192 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17193 $temp3$$Register, $temp4$$Register, $result$$Register,
17194 super_klass_slot);
17195 } else {
17196 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17197 }
17198 %}
17199
17200 ins_pipe(pipe_slow);
17201 %}
17202
17203 // ============================================================================
17204 // Branch Instructions -- short offset versions
17205 //
17206 // These instructions are used to replace jumps of a long offset (the default
17207 // match) with jumps of a shorter offset. These instructions are all tagged
17208 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17209 // match rules in general matching. Instead, the ADLC generates a conversion
17210 // method in the MachNode which can be used to do in-place replacement of the
17211 // long variant with the shorter variant. The compiler will determine if a
17212 // branch can be taken by the is_short_branch_offset() predicate in the machine
17213 // specific code section of the file.
17214
17215 // Jump Direct - Label defines a relative address from JMP+1
17216 instruct jmpDir_short(label labl) %{
17217 match(Goto);
17218 effect(USE labl);
17219
17220 ins_cost(300);
17221 format %{ "jmp,s $labl" %}
17222 size(2);
17223 ins_encode %{
17224 Label* L = $labl$$label;
17225 __ jmpb(*L);
17226 %}
17227 ins_pipe(pipe_jmp);
17228 ins_short_branch(1);
17229 %}
17230
17231 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17232 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17233 match(If cop cr);
17234 effect(USE labl);
17235
17236 ins_cost(300);
17237 format %{ "j$cop,s $labl" %}
17238 size(2);
17239 ins_encode %{
17240 Label* L = $labl$$label;
17241 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17242 %}
17243 ins_pipe(pipe_jcc);
17244 ins_short_branch(1);
17245 %}
17246
17247 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17248 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17249 match(CountedLoopEnd cop cr);
17250 effect(USE labl);
17251
17252 ins_cost(300);
17253 format %{ "j$cop,s $labl\t# loop end" %}
17254 size(2);
17255 ins_encode %{
17256 Label* L = $labl$$label;
17257 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17258 %}
17259 ins_pipe(pipe_jcc);
17260 ins_short_branch(1);
17261 %}
17262
17263 // Jump Direct Conditional - using unsigned comparison
17264 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17265 match(If cop cmp);
17266 effect(USE labl);
17267
17268 ins_cost(300);
17269 format %{ "j$cop,us $labl" %}
17270 size(2);
17271 ins_encode %{
17272 Label* L = $labl$$label;
17273 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17274 %}
17275 ins_pipe(pipe_jcc);
17276 ins_short_branch(1);
17277 %}
17278
17279 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17280 match(If cop cmp);
17281 effect(USE labl);
17282
17283 ins_cost(300);
17284 format %{ "j$cop,us $labl" %}
17285 size(2);
17286 ins_encode %{
17287 Label* L = $labl$$label;
17288 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17289 %}
17290 ins_pipe(pipe_jcc);
17291 ins_short_branch(1);
17292 %}
17293
17294 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17295 match(If cop cmp);
17296 effect(USE labl);
17297
17298 ins_cost(300);
17299 format %{ $$template
17300 if ($cop$$cmpcode == Assembler::notEqual) {
17301 $$emit$$"jp,u,s $labl\n\t"
17302 $$emit$$"j$cop,u,s $labl"
17303 } else {
17304 $$emit$$"jp,u,s done\n\t"
17305 $$emit$$"j$cop,u,s $labl\n\t"
17306 $$emit$$"done:"
17307 }
17308 %}
17309 size(4);
17310 ins_encode %{
17311 Label* l = $labl$$label;
17312 if ($cop$$cmpcode == Assembler::notEqual) {
17313 __ jccb(Assembler::parity, *l);
17314 __ jccb(Assembler::notEqual, *l);
17315 } else if ($cop$$cmpcode == Assembler::equal) {
17316 Label done;
17317 __ jccb(Assembler::parity, done);
17318 __ jccb(Assembler::equal, *l);
17319 __ bind(done);
17320 } else {
17321 ShouldNotReachHere();
17322 }
17323 %}
17324 ins_pipe(pipe_jcc);
17325 ins_short_branch(1);
17326 %}
17327
17328 // ============================================================================
17329 // inlined locking and unlocking
17330
17331 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17332 match(Set cr (FastLock object box));
17333 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17334 ins_cost(300);
17335 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17336 ins_encode %{
17337 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17338 %}
17339 ins_pipe(pipe_slow);
17340 %}
17341
17342 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17343 match(Set cr (FastUnlock object rax_reg));
17344 effect(TEMP tmp, USE_KILL rax_reg);
17345 ins_cost(300);
17346 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17347 ins_encode %{
17348 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17349 %}
17350 ins_pipe(pipe_slow);
17351 %}
17352
17353
17354 // ============================================================================
17355 // Safepoint Instructions
17356 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17357 %{
17358 match(SafePoint poll);
17359 effect(KILL cr, USE poll);
17360
17361 format %{ "testl rax, [$poll]\t"
17362 "# Safepoint: poll for GC" %}
17363 ins_cost(125);
17364 ins_encode %{
17365 __ relocate(relocInfo::poll_type);
17366 address pre_pc = __ pc();
17367 __ testl(rax, Address($poll$$Register, 0));
17368 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17369 %}
17370 ins_pipe(ialu_reg_mem);
17371 %}
17372
17373 instruct mask_all_evexL(kReg dst, rRegL src) %{
17374 match(Set dst (MaskAll src));
17375 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17376 ins_encode %{
17377 int mask_len = Matcher::vector_length(this);
17378 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17379 %}
17380 ins_pipe( pipe_slow );
17381 %}
17382
17383 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17384 predicate(Matcher::vector_length(n) > 32);
17385 match(Set dst (MaskAll src));
17386 effect(TEMP tmp);
17387 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17388 ins_encode %{
17389 int mask_len = Matcher::vector_length(this);
17390 __ movslq($tmp$$Register, $src$$Register);
17391 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17392 %}
17393 ins_pipe( pipe_slow );
17394 %}
17395
17396 // ============================================================================
17397 // Procedure Call/Return Instructions
17398 // Call Java Static Instruction
17399 // Note: If this code changes, the corresponding ret_addr_offset() and
17400 // compute_padding() functions will have to be adjusted.
17401 instruct CallStaticJavaDirect(method meth) %{
17402 match(CallStaticJava);
17403 effect(USE meth);
17404
17405 ins_cost(300);
17406 format %{ "call,static " %}
17407 opcode(0xE8); /* E8 cd */
17408 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17409 ins_pipe(pipe_slow);
17410 ins_alignment(4);
17411 %}
17412
17413 // Call Java Dynamic Instruction
17414 // Note: If this code changes, the corresponding ret_addr_offset() and
17415 // compute_padding() functions will have to be adjusted.
17416 instruct CallDynamicJavaDirect(method meth)
17417 %{
17418 match(CallDynamicJava);
17419 effect(USE meth);
17420
17421 ins_cost(300);
17422 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17423 "call,dynamic " %}
17424 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17425 ins_pipe(pipe_slow);
17426 ins_alignment(4);
17427 %}
17428
17429 // Call Runtime Instruction
17430 instruct CallRuntimeDirect(method meth)
17431 %{
17432 match(CallRuntime);
17433 effect(USE meth);
17434
17435 ins_cost(300);
17436 format %{ "call,runtime " %}
17437 ins_encode(clear_avx, Java_To_Runtime(meth));
17438 ins_pipe(pipe_slow);
17439 %}
17440
17441 // Call runtime without safepoint
17442 instruct CallLeafDirect(method meth)
17443 %{
17444 match(CallLeaf);
17445 effect(USE meth);
17446
17447 ins_cost(300);
17448 format %{ "call_leaf,runtime " %}
17449 ins_encode(clear_avx, Java_To_Runtime(meth));
17450 ins_pipe(pipe_slow);
17451 %}
17452
17453 // Call runtime without safepoint and with vector arguments
17454 instruct CallLeafDirectVector(method meth)
17455 %{
17456 match(CallLeafVector);
17457 effect(USE meth);
17458
17459 ins_cost(300);
17460 format %{ "call_leaf,vector " %}
17461 ins_encode(Java_To_Runtime(meth));
17462 ins_pipe(pipe_slow);
17463 %}
17464
17465 // Call runtime without safepoint
17466 // entry point is null, target holds the address to call
17467 instruct CallLeafNoFPInDirect(rRegP target)
17468 %{
17469 predicate(n->as_Call()->entry_point() == nullptr);
17470 match(CallLeafNoFP target);
17471
17472 ins_cost(300);
17473 format %{ "call_leaf_nofp,runtime indirect " %}
17474 ins_encode %{
17475 __ call($target$$Register);
17476 %}
17477
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 // Call runtime without safepoint
17482 instruct CallLeafNoFPDirect(method meth)
17483 %{
17484 predicate(n->as_Call()->entry_point() != nullptr);
17485 match(CallLeafNoFP);
17486 effect(USE meth);
17487
17488 ins_cost(300);
17489 format %{ "call_leaf_nofp,runtime " %}
17490 ins_encode(clear_avx, Java_To_Runtime(meth));
17491 ins_pipe(pipe_slow);
17492 %}
17493
17494 // Return Instruction
17495 // Remove the return address & jump to it.
17496 // Notice: We always emit a nop after a ret to make sure there is room
17497 // for safepoint patching
17498 instruct Ret()
17499 %{
17500 match(Return);
17501
17502 format %{ "ret" %}
17503 ins_encode %{
17504 __ ret(0);
17505 %}
17506 ins_pipe(pipe_jmp);
17507 %}
17508
17509 // Tail Call; Jump from runtime stub to Java code.
17510 // Also known as an 'interprocedural jump'.
17511 // Target of jump will eventually return to caller.
17512 // TailJump below removes the return address.
17513 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17514 // emitted just above the TailCall which has reset rbp to the caller state.
17515 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17516 %{
17517 match(TailCall jump_target method_ptr);
17518
17519 ins_cost(300);
17520 format %{ "jmp $jump_target\t# rbx holds method" %}
17521 ins_encode %{
17522 __ jmp($jump_target$$Register);
17523 %}
17524 ins_pipe(pipe_jmp);
17525 %}
17526
17527 // Tail Jump; remove the return address; jump to target.
17528 // TailCall above leaves the return address around.
17529 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17530 %{
17531 match(TailJump jump_target ex_oop);
17532
17533 ins_cost(300);
17534 format %{ "popq rdx\t# pop return address\n\t"
17535 "jmp $jump_target" %}
17536 ins_encode %{
17537 __ popq(as_Register(RDX_enc));
17538 __ jmp($jump_target$$Register);
17539 %}
17540 ins_pipe(pipe_jmp);
17541 %}
17542
17543 // Forward exception.
17544 instruct ForwardExceptionjmp()
17545 %{
17546 match(ForwardException);
17547
17548 format %{ "jmp forward_exception_stub" %}
17549 ins_encode %{
17550 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17551 %}
17552 ins_pipe(pipe_jmp);
17553 %}
17554
17555 // Create exception oop: created by stack-crawling runtime code.
17556 // Created exception is now available to this handler, and is setup
17557 // just prior to jumping to this handler. No code emitted.
17558 instruct CreateException(rax_RegP ex_oop)
17559 %{
17560 match(Set ex_oop (CreateEx));
17561
17562 size(0);
17563 // use the following format syntax
17564 format %{ "# exception oop is in rax; no code emitted" %}
17565 ins_encode();
17566 ins_pipe(empty);
17567 %}
17568
17569 // Rethrow exception:
17570 // The exception oop will come in the first argument position.
17571 // Then JUMP (not call) to the rethrow stub code.
17572 instruct RethrowException()
17573 %{
17574 match(Rethrow);
17575
17576 // use the following format syntax
17577 format %{ "jmp rethrow_stub" %}
17578 ins_encode %{
17579 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17580 %}
17581 ins_pipe(pipe_jmp);
17582 %}
17583
17584 // ============================================================================
17585 // This name is KNOWN by the ADLC and cannot be changed.
17586 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17587 // for this guy.
17588 instruct tlsLoadP(r15_RegP dst) %{
17589 match(Set dst (ThreadLocal));
17590 effect(DEF dst);
17591
17592 size(0);
17593 format %{ "# TLS is in R15" %}
17594 ins_encode( /*empty encoding*/ );
17595 ins_pipe(ialu_reg_reg);
17596 %}
17597
17598 instruct addF_reg(regF dst, regF src) %{
17599 predicate(UseAVX == 0);
17600 match(Set dst (AddF dst src));
17601
17602 format %{ "addss $dst, $src" %}
17603 ins_cost(150);
17604 ins_encode %{
17605 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17606 %}
17607 ins_pipe(pipe_slow);
17608 %}
17609
17610 instruct addF_mem(regF dst, memory src) %{
17611 predicate(UseAVX == 0);
17612 match(Set dst (AddF dst (LoadF src)));
17613
17614 format %{ "addss $dst, $src" %}
17615 ins_cost(150);
17616 ins_encode %{
17617 __ addss($dst$$XMMRegister, $src$$Address);
17618 %}
17619 ins_pipe(pipe_slow);
17620 %}
17621
17622 instruct addF_imm(regF dst, immF con) %{
17623 predicate(UseAVX == 0);
17624 match(Set dst (AddF dst con));
17625 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17626 ins_cost(150);
17627 ins_encode %{
17628 __ addss($dst$$XMMRegister, $constantaddress($con));
17629 %}
17630 ins_pipe(pipe_slow);
17631 %}
17632
17633 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17634 predicate(UseAVX > 0);
17635 match(Set dst (AddF src1 src2));
17636
17637 format %{ "vaddss $dst, $src1, $src2" %}
17638 ins_cost(150);
17639 ins_encode %{
17640 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17641 %}
17642 ins_pipe(pipe_slow);
17643 %}
17644
17645 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17646 predicate(UseAVX > 0);
17647 match(Set dst (AddF src1 (LoadF src2)));
17648
17649 format %{ "vaddss $dst, $src1, $src2" %}
17650 ins_cost(150);
17651 ins_encode %{
17652 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17653 %}
17654 ins_pipe(pipe_slow);
17655 %}
17656
17657 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17658 predicate(UseAVX > 0);
17659 match(Set dst (AddF src con));
17660
17661 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17662 ins_cost(150);
17663 ins_encode %{
17664 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17665 %}
17666 ins_pipe(pipe_slow);
17667 %}
17668
17669 instruct addD_reg(regD dst, regD src) %{
17670 predicate(UseAVX == 0);
17671 match(Set dst (AddD dst src));
17672
17673 format %{ "addsd $dst, $src" %}
17674 ins_cost(150);
17675 ins_encode %{
17676 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17677 %}
17678 ins_pipe(pipe_slow);
17679 %}
17680
17681 instruct addD_mem(regD dst, memory src) %{
17682 predicate(UseAVX == 0);
17683 match(Set dst (AddD dst (LoadD src)));
17684
17685 format %{ "addsd $dst, $src" %}
17686 ins_cost(150);
17687 ins_encode %{
17688 __ addsd($dst$$XMMRegister, $src$$Address);
17689 %}
17690 ins_pipe(pipe_slow);
17691 %}
17692
17693 instruct addD_imm(regD dst, immD con) %{
17694 predicate(UseAVX == 0);
17695 match(Set dst (AddD dst con));
17696 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17697 ins_cost(150);
17698 ins_encode %{
17699 __ addsd($dst$$XMMRegister, $constantaddress($con));
17700 %}
17701 ins_pipe(pipe_slow);
17702 %}
17703
17704 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17705 predicate(UseAVX > 0);
17706 match(Set dst (AddD src1 src2));
17707
17708 format %{ "vaddsd $dst, $src1, $src2" %}
17709 ins_cost(150);
17710 ins_encode %{
17711 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17712 %}
17713 ins_pipe(pipe_slow);
17714 %}
17715
17716 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17717 predicate(UseAVX > 0);
17718 match(Set dst (AddD src1 (LoadD src2)));
17719
17720 format %{ "vaddsd $dst, $src1, $src2" %}
17721 ins_cost(150);
17722 ins_encode %{
17723 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17724 %}
17725 ins_pipe(pipe_slow);
17726 %}
17727
17728 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17729 predicate(UseAVX > 0);
17730 match(Set dst (AddD src con));
17731
17732 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17733 ins_cost(150);
17734 ins_encode %{
17735 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17736 %}
17737 ins_pipe(pipe_slow);
17738 %}
17739
17740 instruct subF_reg(regF dst, regF src) %{
17741 predicate(UseAVX == 0);
17742 match(Set dst (SubF dst src));
17743
17744 format %{ "subss $dst, $src" %}
17745 ins_cost(150);
17746 ins_encode %{
17747 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17748 %}
17749 ins_pipe(pipe_slow);
17750 %}
17751
17752 instruct subF_mem(regF dst, memory src) %{
17753 predicate(UseAVX == 0);
17754 match(Set dst (SubF dst (LoadF src)));
17755
17756 format %{ "subss $dst, $src" %}
17757 ins_cost(150);
17758 ins_encode %{
17759 __ subss($dst$$XMMRegister, $src$$Address);
17760 %}
17761 ins_pipe(pipe_slow);
17762 %}
17763
17764 instruct subF_imm(regF dst, immF con) %{
17765 predicate(UseAVX == 0);
17766 match(Set dst (SubF dst con));
17767 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17768 ins_cost(150);
17769 ins_encode %{
17770 __ subss($dst$$XMMRegister, $constantaddress($con));
17771 %}
17772 ins_pipe(pipe_slow);
17773 %}
17774
17775 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17776 predicate(UseAVX > 0);
17777 match(Set dst (SubF src1 src2));
17778
17779 format %{ "vsubss $dst, $src1, $src2" %}
17780 ins_cost(150);
17781 ins_encode %{
17782 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17783 %}
17784 ins_pipe(pipe_slow);
17785 %}
17786
17787 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17788 predicate(UseAVX > 0);
17789 match(Set dst (SubF src1 (LoadF src2)));
17790
17791 format %{ "vsubss $dst, $src1, $src2" %}
17792 ins_cost(150);
17793 ins_encode %{
17794 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17795 %}
17796 ins_pipe(pipe_slow);
17797 %}
17798
17799 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17800 predicate(UseAVX > 0);
17801 match(Set dst (SubF src con));
17802
17803 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17804 ins_cost(150);
17805 ins_encode %{
17806 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17807 %}
17808 ins_pipe(pipe_slow);
17809 %}
17810
17811 instruct subD_reg(regD dst, regD src) %{
17812 predicate(UseAVX == 0);
17813 match(Set dst (SubD dst src));
17814
17815 format %{ "subsd $dst, $src" %}
17816 ins_cost(150);
17817 ins_encode %{
17818 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17819 %}
17820 ins_pipe(pipe_slow);
17821 %}
17822
17823 instruct subD_mem(regD dst, memory src) %{
17824 predicate(UseAVX == 0);
17825 match(Set dst (SubD dst (LoadD src)));
17826
17827 format %{ "subsd $dst, $src" %}
17828 ins_cost(150);
17829 ins_encode %{
17830 __ subsd($dst$$XMMRegister, $src$$Address);
17831 %}
17832 ins_pipe(pipe_slow);
17833 %}
17834
17835 instruct subD_imm(regD dst, immD con) %{
17836 predicate(UseAVX == 0);
17837 match(Set dst (SubD dst con));
17838 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17839 ins_cost(150);
17840 ins_encode %{
17841 __ subsd($dst$$XMMRegister, $constantaddress($con));
17842 %}
17843 ins_pipe(pipe_slow);
17844 %}
17845
17846 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17847 predicate(UseAVX > 0);
17848 match(Set dst (SubD src1 src2));
17849
17850 format %{ "vsubsd $dst, $src1, $src2" %}
17851 ins_cost(150);
17852 ins_encode %{
17853 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17854 %}
17855 ins_pipe(pipe_slow);
17856 %}
17857
17858 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17859 predicate(UseAVX > 0);
17860 match(Set dst (SubD src1 (LoadD src2)));
17861
17862 format %{ "vsubsd $dst, $src1, $src2" %}
17863 ins_cost(150);
17864 ins_encode %{
17865 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17866 %}
17867 ins_pipe(pipe_slow);
17868 %}
17869
17870 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17871 predicate(UseAVX > 0);
17872 match(Set dst (SubD src con));
17873
17874 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17875 ins_cost(150);
17876 ins_encode %{
17877 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17878 %}
17879 ins_pipe(pipe_slow);
17880 %}
17881
17882 instruct mulF_reg(regF dst, regF src) %{
17883 predicate(UseAVX == 0);
17884 match(Set dst (MulF dst src));
17885
17886 format %{ "mulss $dst, $src" %}
17887 ins_cost(150);
17888 ins_encode %{
17889 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17890 %}
17891 ins_pipe(pipe_slow);
17892 %}
17893
17894 instruct mulF_mem(regF dst, memory src) %{
17895 predicate(UseAVX == 0);
17896 match(Set dst (MulF dst (LoadF src)));
17897
17898 format %{ "mulss $dst, $src" %}
17899 ins_cost(150);
17900 ins_encode %{
17901 __ mulss($dst$$XMMRegister, $src$$Address);
17902 %}
17903 ins_pipe(pipe_slow);
17904 %}
17905
17906 instruct mulF_imm(regF dst, immF con) %{
17907 predicate(UseAVX == 0);
17908 match(Set dst (MulF dst con));
17909 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17910 ins_cost(150);
17911 ins_encode %{
17912 __ mulss($dst$$XMMRegister, $constantaddress($con));
17913 %}
17914 ins_pipe(pipe_slow);
17915 %}
17916
17917 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17918 predicate(UseAVX > 0);
17919 match(Set dst (MulF src1 src2));
17920
17921 format %{ "vmulss $dst, $src1, $src2" %}
17922 ins_cost(150);
17923 ins_encode %{
17924 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17925 %}
17926 ins_pipe(pipe_slow);
17927 %}
17928
17929 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17930 predicate(UseAVX > 0);
17931 match(Set dst (MulF src1 (LoadF src2)));
17932
17933 format %{ "vmulss $dst, $src1, $src2" %}
17934 ins_cost(150);
17935 ins_encode %{
17936 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17937 %}
17938 ins_pipe(pipe_slow);
17939 %}
17940
17941 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17942 predicate(UseAVX > 0);
17943 match(Set dst (MulF src con));
17944
17945 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17946 ins_cost(150);
17947 ins_encode %{
17948 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17949 %}
17950 ins_pipe(pipe_slow);
17951 %}
17952
17953 instruct mulD_reg(regD dst, regD src) %{
17954 predicate(UseAVX == 0);
17955 match(Set dst (MulD dst src));
17956
17957 format %{ "mulsd $dst, $src" %}
17958 ins_cost(150);
17959 ins_encode %{
17960 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17961 %}
17962 ins_pipe(pipe_slow);
17963 %}
17964
17965 instruct mulD_mem(regD dst, memory src) %{
17966 predicate(UseAVX == 0);
17967 match(Set dst (MulD dst (LoadD src)));
17968
17969 format %{ "mulsd $dst, $src" %}
17970 ins_cost(150);
17971 ins_encode %{
17972 __ mulsd($dst$$XMMRegister, $src$$Address);
17973 %}
17974 ins_pipe(pipe_slow);
17975 %}
17976
17977 instruct mulD_imm(regD dst, immD con) %{
17978 predicate(UseAVX == 0);
17979 match(Set dst (MulD dst con));
17980 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17981 ins_cost(150);
17982 ins_encode %{
17983 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17984 %}
17985 ins_pipe(pipe_slow);
17986 %}
17987
17988 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17989 predicate(UseAVX > 0);
17990 match(Set dst (MulD src1 src2));
17991
17992 format %{ "vmulsd $dst, $src1, $src2" %}
17993 ins_cost(150);
17994 ins_encode %{
17995 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17996 %}
17997 ins_pipe(pipe_slow);
17998 %}
17999
18000 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18001 predicate(UseAVX > 0);
18002 match(Set dst (MulD src1 (LoadD src2)));
18003
18004 format %{ "vmulsd $dst, $src1, $src2" %}
18005 ins_cost(150);
18006 ins_encode %{
18007 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18008 %}
18009 ins_pipe(pipe_slow);
18010 %}
18011
18012 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18013 predicate(UseAVX > 0);
18014 match(Set dst (MulD src con));
18015
18016 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18017 ins_cost(150);
18018 ins_encode %{
18019 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18020 %}
18021 ins_pipe(pipe_slow);
18022 %}
18023
18024 instruct divF_reg(regF dst, regF src) %{
18025 predicate(UseAVX == 0);
18026 match(Set dst (DivF dst src));
18027
18028 format %{ "divss $dst, $src" %}
18029 ins_cost(150);
18030 ins_encode %{
18031 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18032 %}
18033 ins_pipe(pipe_slow);
18034 %}
18035
18036 instruct divF_mem(regF dst, memory src) %{
18037 predicate(UseAVX == 0);
18038 match(Set dst (DivF dst (LoadF src)));
18039
18040 format %{ "divss $dst, $src" %}
18041 ins_cost(150);
18042 ins_encode %{
18043 __ divss($dst$$XMMRegister, $src$$Address);
18044 %}
18045 ins_pipe(pipe_slow);
18046 %}
18047
18048 instruct divF_imm(regF dst, immF con) %{
18049 predicate(UseAVX == 0);
18050 match(Set dst (DivF dst con));
18051 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18052 ins_cost(150);
18053 ins_encode %{
18054 __ divss($dst$$XMMRegister, $constantaddress($con));
18055 %}
18056 ins_pipe(pipe_slow);
18057 %}
18058
18059 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18060 predicate(UseAVX > 0);
18061 match(Set dst (DivF src1 src2));
18062
18063 format %{ "vdivss $dst, $src1, $src2" %}
18064 ins_cost(150);
18065 ins_encode %{
18066 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18067 %}
18068 ins_pipe(pipe_slow);
18069 %}
18070
18071 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18072 predicate(UseAVX > 0);
18073 match(Set dst (DivF src1 (LoadF src2)));
18074
18075 format %{ "vdivss $dst, $src1, $src2" %}
18076 ins_cost(150);
18077 ins_encode %{
18078 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18079 %}
18080 ins_pipe(pipe_slow);
18081 %}
18082
18083 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18084 predicate(UseAVX > 0);
18085 match(Set dst (DivF src con));
18086
18087 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18088 ins_cost(150);
18089 ins_encode %{
18090 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18091 %}
18092 ins_pipe(pipe_slow);
18093 %}
18094
18095 instruct divD_reg(regD dst, regD src) %{
18096 predicate(UseAVX == 0);
18097 match(Set dst (DivD dst src));
18098
18099 format %{ "divsd $dst, $src" %}
18100 ins_cost(150);
18101 ins_encode %{
18102 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18103 %}
18104 ins_pipe(pipe_slow);
18105 %}
18106
18107 instruct divD_mem(regD dst, memory src) %{
18108 predicate(UseAVX == 0);
18109 match(Set dst (DivD dst (LoadD src)));
18110
18111 format %{ "divsd $dst, $src" %}
18112 ins_cost(150);
18113 ins_encode %{
18114 __ divsd($dst$$XMMRegister, $src$$Address);
18115 %}
18116 ins_pipe(pipe_slow);
18117 %}
18118
18119 instruct divD_imm(regD dst, immD con) %{
18120 predicate(UseAVX == 0);
18121 match(Set dst (DivD dst con));
18122 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18123 ins_cost(150);
18124 ins_encode %{
18125 __ divsd($dst$$XMMRegister, $constantaddress($con));
18126 %}
18127 ins_pipe(pipe_slow);
18128 %}
18129
18130 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18131 predicate(UseAVX > 0);
18132 match(Set dst (DivD src1 src2));
18133
18134 format %{ "vdivsd $dst, $src1, $src2" %}
18135 ins_cost(150);
18136 ins_encode %{
18137 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18138 %}
18139 ins_pipe(pipe_slow);
18140 %}
18141
18142 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18143 predicate(UseAVX > 0);
18144 match(Set dst (DivD src1 (LoadD src2)));
18145
18146 format %{ "vdivsd $dst, $src1, $src2" %}
18147 ins_cost(150);
18148 ins_encode %{
18149 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18150 %}
18151 ins_pipe(pipe_slow);
18152 %}
18153
18154 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18155 predicate(UseAVX > 0);
18156 match(Set dst (DivD src con));
18157
18158 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18159 ins_cost(150);
18160 ins_encode %{
18161 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18162 %}
18163 ins_pipe(pipe_slow);
18164 %}
18165
18166 instruct absF_reg(regF dst) %{
18167 predicate(UseAVX == 0);
18168 match(Set dst (AbsF dst));
18169 ins_cost(150);
18170 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18171 ins_encode %{
18172 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18173 %}
18174 ins_pipe(pipe_slow);
18175 %}
18176
18177 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18178 predicate(UseAVX > 0);
18179 match(Set dst (AbsF src));
18180 ins_cost(150);
18181 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18182 ins_encode %{
18183 int vlen_enc = Assembler::AVX_128bit;
18184 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18185 ExternalAddress(float_signmask()), vlen_enc);
18186 %}
18187 ins_pipe(pipe_slow);
18188 %}
18189
18190 instruct absD_reg(regD dst) %{
18191 predicate(UseAVX == 0);
18192 match(Set dst (AbsD dst));
18193 ins_cost(150);
18194 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18195 "# abs double by sign masking" %}
18196 ins_encode %{
18197 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18198 %}
18199 ins_pipe(pipe_slow);
18200 %}
18201
18202 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18203 predicate(UseAVX > 0);
18204 match(Set dst (AbsD src));
18205 ins_cost(150);
18206 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18207 "# abs double by sign masking" %}
18208 ins_encode %{
18209 int vlen_enc = Assembler::AVX_128bit;
18210 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18211 ExternalAddress(double_signmask()), vlen_enc);
18212 %}
18213 ins_pipe(pipe_slow);
18214 %}
18215
18216 instruct negF_reg(regF dst) %{
18217 predicate(UseAVX == 0);
18218 match(Set dst (NegF dst));
18219 ins_cost(150);
18220 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18221 ins_encode %{
18222 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18223 %}
18224 ins_pipe(pipe_slow);
18225 %}
18226
18227 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18228 predicate(UseAVX > 0);
18229 match(Set dst (NegF src));
18230 ins_cost(150);
18231 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18232 ins_encode %{
18233 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18234 ExternalAddress(float_signflip()));
18235 %}
18236 ins_pipe(pipe_slow);
18237 %}
18238
18239 instruct negD_reg(regD dst) %{
18240 predicate(UseAVX == 0);
18241 match(Set dst (NegD dst));
18242 ins_cost(150);
18243 format %{ "xorpd $dst, [0x8000000000000000]\t"
18244 "# neg double by sign flipping" %}
18245 ins_encode %{
18246 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18247 %}
18248 ins_pipe(pipe_slow);
18249 %}
18250
18251 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18252 predicate(UseAVX > 0);
18253 match(Set dst (NegD src));
18254 ins_cost(150);
18255 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18256 "# neg double by sign flipping" %}
18257 ins_encode %{
18258 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18259 ExternalAddress(double_signflip()));
18260 %}
18261 ins_pipe(pipe_slow);
18262 %}
18263
18264 // sqrtss instruction needs destination register to be pre initialized for best performance
18265 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18266 instruct sqrtF_reg(regF dst) %{
18267 match(Set dst (SqrtF dst));
18268 format %{ "sqrtss $dst, $dst" %}
18269 ins_encode %{
18270 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18271 %}
18272 ins_pipe(pipe_slow);
18273 %}
18274
18275 // sqrtsd instruction needs destination register to be pre initialized for best performance
18276 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18277 instruct sqrtD_reg(regD dst) %{
18278 match(Set dst (SqrtD dst));
18279 format %{ "sqrtsd $dst, $dst" %}
18280 ins_encode %{
18281 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18282 %}
18283 ins_pipe(pipe_slow);
18284 %}
18285
18286 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18287 effect(TEMP tmp);
18288 match(Set dst (ConvF2HF src));
18289 ins_cost(125);
18290 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18291 ins_encode %{
18292 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18293 %}
18294 ins_pipe( pipe_slow );
18295 %}
18296
18297 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18298 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18299 effect(TEMP ktmp, TEMP rtmp);
18300 match(Set mem (StoreC mem (ConvF2HF src)));
18301 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18302 ins_encode %{
18303 __ movl($rtmp$$Register, 0x1);
18304 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18305 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18306 %}
18307 ins_pipe( pipe_slow );
18308 %}
18309
18310 instruct vconvF2HF(vec dst, vec src) %{
18311 match(Set dst (VectorCastF2HF src));
18312 format %{ "vector_conv_F2HF $dst $src" %}
18313 ins_encode %{
18314 int vlen_enc = vector_length_encoding(this, $src);
18315 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18316 %}
18317 ins_pipe( pipe_slow );
18318 %}
18319
18320 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18321 predicate(n->as_StoreVector()->memory_size() >= 16);
18322 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18323 format %{ "vcvtps2ph $mem,$src" %}
18324 ins_encode %{
18325 int vlen_enc = vector_length_encoding(this, $src);
18326 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18327 %}
18328 ins_pipe( pipe_slow );
18329 %}
18330
18331 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18332 match(Set dst (ConvHF2F src));
18333 format %{ "vcvtph2ps $dst,$src" %}
18334 ins_encode %{
18335 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18336 %}
18337 ins_pipe( pipe_slow );
18338 %}
18339
18340 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18341 match(Set dst (VectorCastHF2F (LoadVector mem)));
18342 format %{ "vcvtph2ps $dst,$mem" %}
18343 ins_encode %{
18344 int vlen_enc = vector_length_encoding(this);
18345 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18346 %}
18347 ins_pipe( pipe_slow );
18348 %}
18349
18350 instruct vconvHF2F(vec dst, vec src) %{
18351 match(Set dst (VectorCastHF2F src));
18352 ins_cost(125);
18353 format %{ "vector_conv_HF2F $dst,$src" %}
18354 ins_encode %{
18355 int vlen_enc = vector_length_encoding(this);
18356 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18357 %}
18358 ins_pipe( pipe_slow );
18359 %}
18360
18361 // ---------------------------------------- VectorReinterpret ------------------------------------
18362 instruct reinterpret_mask(kReg dst) %{
18363 predicate(n->bottom_type()->isa_vectmask() &&
18364 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18365 match(Set dst (VectorReinterpret dst));
18366 ins_cost(125);
18367 format %{ "vector_reinterpret $dst\t!" %}
18368 ins_encode %{
18369 // empty
18370 %}
18371 ins_pipe( pipe_slow );
18372 %}
18373
18374 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18375 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18376 n->bottom_type()->isa_vectmask() &&
18377 n->in(1)->bottom_type()->isa_vectmask() &&
18378 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18379 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18380 match(Set dst (VectorReinterpret src));
18381 effect(TEMP xtmp);
18382 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18383 ins_encode %{
18384 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18385 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18386 assert(src_sz == dst_sz , "src and dst size mismatch");
18387 int vlen_enc = vector_length_encoding(src_sz);
18388 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18389 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18390 %}
18391 ins_pipe( pipe_slow );
18392 %}
18393
18394 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18395 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18396 n->bottom_type()->isa_vectmask() &&
18397 n->in(1)->bottom_type()->isa_vectmask() &&
18398 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18399 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18400 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18401 match(Set dst (VectorReinterpret src));
18402 effect(TEMP xtmp);
18403 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18404 ins_encode %{
18405 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18406 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18407 assert(src_sz == dst_sz , "src and dst size mismatch");
18408 int vlen_enc = vector_length_encoding(src_sz);
18409 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18410 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18411 %}
18412 ins_pipe( pipe_slow );
18413 %}
18414
18415 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18416 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18417 n->bottom_type()->isa_vectmask() &&
18418 n->in(1)->bottom_type()->isa_vectmask() &&
18419 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18420 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18421 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18422 match(Set dst (VectorReinterpret src));
18423 effect(TEMP xtmp);
18424 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18425 ins_encode %{
18426 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18427 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18428 assert(src_sz == dst_sz , "src and dst size mismatch");
18429 int vlen_enc = vector_length_encoding(src_sz);
18430 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18431 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18432 %}
18433 ins_pipe( pipe_slow );
18434 %}
18435
18436 instruct reinterpret(vec dst) %{
18437 predicate(!n->bottom_type()->isa_vectmask() &&
18438 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18439 match(Set dst (VectorReinterpret dst));
18440 ins_cost(125);
18441 format %{ "vector_reinterpret $dst\t!" %}
18442 ins_encode %{
18443 // empty
18444 %}
18445 ins_pipe( pipe_slow );
18446 %}
18447
18448 instruct reinterpret_expand(vec dst, vec src) %{
18449 predicate(UseAVX == 0 &&
18450 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18451 match(Set dst (VectorReinterpret src));
18452 ins_cost(125);
18453 effect(TEMP dst);
18454 format %{ "vector_reinterpret_expand $dst,$src" %}
18455 ins_encode %{
18456 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18457 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18458
18459 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18460 if (src_vlen_in_bytes == 4) {
18461 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18462 } else {
18463 assert(src_vlen_in_bytes == 8, "");
18464 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18465 }
18466 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18467 %}
18468 ins_pipe( pipe_slow );
18469 %}
18470
18471 instruct vreinterpret_expand4(legVec dst, vec src) %{
18472 predicate(UseAVX > 0 &&
18473 !n->bottom_type()->isa_vectmask() &&
18474 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18475 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18476 match(Set dst (VectorReinterpret src));
18477 ins_cost(125);
18478 format %{ "vector_reinterpret_expand $dst,$src" %}
18479 ins_encode %{
18480 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18481 %}
18482 ins_pipe( pipe_slow );
18483 %}
18484
18485
18486 instruct vreinterpret_expand(legVec dst, vec src) %{
18487 predicate(UseAVX > 0 &&
18488 !n->bottom_type()->isa_vectmask() &&
18489 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18490 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18491 match(Set dst (VectorReinterpret src));
18492 ins_cost(125);
18493 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18494 ins_encode %{
18495 switch (Matcher::vector_length_in_bytes(this, $src)) {
18496 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18497 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18498 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18499 default: ShouldNotReachHere();
18500 }
18501 %}
18502 ins_pipe( pipe_slow );
18503 %}
18504
18505 instruct reinterpret_shrink(vec dst, legVec src) %{
18506 predicate(!n->bottom_type()->isa_vectmask() &&
18507 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18508 match(Set dst (VectorReinterpret src));
18509 ins_cost(125);
18510 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18511 ins_encode %{
18512 switch (Matcher::vector_length_in_bytes(this)) {
18513 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18514 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18515 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18516 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18517 default: ShouldNotReachHere();
18518 }
18519 %}
18520 ins_pipe( pipe_slow );
18521 %}
18522
18523 // ----------------------------------------------------------------------------------------------------
18524
18525 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18526 match(Set dst (RoundDoubleMode src rmode));
18527 format %{ "roundsd $dst,$src" %}
18528 ins_cost(150);
18529 ins_encode %{
18530 assert(UseSSE >= 4, "required");
18531 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18532 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18533 }
18534 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18535 %}
18536 ins_pipe(pipe_slow);
18537 %}
18538
18539 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18540 match(Set dst (RoundDoubleMode con rmode));
18541 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18542 ins_cost(150);
18543 ins_encode %{
18544 assert(UseSSE >= 4, "required");
18545 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18546 %}
18547 ins_pipe(pipe_slow);
18548 %}
18549
18550 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18551 predicate(Matcher::vector_length(n) < 8);
18552 match(Set dst (RoundDoubleModeV src rmode));
18553 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18554 ins_encode %{
18555 assert(UseAVX > 0, "required");
18556 int vlen_enc = vector_length_encoding(this);
18557 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18558 %}
18559 ins_pipe( pipe_slow );
18560 %}
18561
18562 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18563 predicate(Matcher::vector_length(n) == 8);
18564 match(Set dst (RoundDoubleModeV src rmode));
18565 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18566 ins_encode %{
18567 assert(UseAVX > 2, "required");
18568 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18569 %}
18570 ins_pipe( pipe_slow );
18571 %}
18572
18573 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18574 predicate(Matcher::vector_length(n) < 8);
18575 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18576 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18577 ins_encode %{
18578 assert(UseAVX > 0, "required");
18579 int vlen_enc = vector_length_encoding(this);
18580 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18581 %}
18582 ins_pipe( pipe_slow );
18583 %}
18584
18585 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18586 predicate(Matcher::vector_length(n) == 8);
18587 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18588 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18589 ins_encode %{
18590 assert(UseAVX > 2, "required");
18591 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18592 %}
18593 ins_pipe( pipe_slow );
18594 %}
18595
18596 instruct onspinwait() %{
18597 match(OnSpinWait);
18598 ins_cost(200);
18599
18600 format %{
18601 $$template
18602 $$emit$$"pause\t! membar_onspinwait"
18603 %}
18604 ins_encode %{
18605 __ pause();
18606 %}
18607 ins_pipe(pipe_slow);
18608 %}
18609
18610 // a * b + c
18611 instruct fmaD_reg(regD a, regD b, regD c) %{
18612 match(Set c (FmaD c (Binary a b)));
18613 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18614 ins_cost(150);
18615 ins_encode %{
18616 assert(UseFMA, "Needs FMA instructions support.");
18617 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18618 %}
18619 ins_pipe( pipe_slow );
18620 %}
18621
18622 // a * b + c
18623 instruct fmaF_reg(regF a, regF b, regF c) %{
18624 match(Set c (FmaF c (Binary a b)));
18625 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18626 ins_cost(150);
18627 ins_encode %{
18628 assert(UseFMA, "Needs FMA instructions support.");
18629 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18630 %}
18631 ins_pipe( pipe_slow );
18632 %}
18633
18634 // ====================VECTOR INSTRUCTIONS=====================================
18635
18636 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18637 instruct MoveVec2Leg(legVec dst, vec src) %{
18638 match(Set dst src);
18639 format %{ "" %}
18640 ins_encode %{
18641 ShouldNotReachHere();
18642 %}
18643 ins_pipe( fpu_reg_reg );
18644 %}
18645
18646 instruct MoveLeg2Vec(vec dst, legVec src) %{
18647 match(Set dst src);
18648 format %{ "" %}
18649 ins_encode %{
18650 ShouldNotReachHere();
18651 %}
18652 ins_pipe( fpu_reg_reg );
18653 %}
18654
18655 // ============================================================================
18656
18657 // Load vectors generic operand pattern
18658 instruct loadV(vec dst, memory mem) %{
18659 match(Set dst (LoadVector mem));
18660 ins_cost(125);
18661 format %{ "load_vector $dst,$mem" %}
18662 ins_encode %{
18663 BasicType bt = Matcher::vector_element_basic_type(this);
18664 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18665 %}
18666 ins_pipe( pipe_slow );
18667 %}
18668
18669 // Store vectors generic operand pattern.
18670 instruct storeV(memory mem, vec src) %{
18671 match(Set mem (StoreVector mem src));
18672 ins_cost(145);
18673 format %{ "store_vector $mem,$src\n\t" %}
18674 ins_encode %{
18675 switch (Matcher::vector_length_in_bytes(this, $src)) {
18676 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18677 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18678 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18679 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18680 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18681 default: ShouldNotReachHere();
18682 }
18683 %}
18684 ins_pipe( pipe_slow );
18685 %}
18686
18687 // ---------------------------------------- Gather ------------------------------------
18688
18689 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18690
18691 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18692 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18693 Matcher::vector_length_in_bytes(n) <= 32);
18694 match(Set dst (LoadVectorGather mem idx));
18695 effect(TEMP dst, TEMP tmp, TEMP mask);
18696 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18697 ins_encode %{
18698 int vlen_enc = vector_length_encoding(this);
18699 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18700 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18701 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18702 __ lea($tmp$$Register, $mem$$Address);
18703 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18704 %}
18705 ins_pipe( pipe_slow );
18706 %}
18707
18708
18709 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18710 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18711 !is_subword_type(Matcher::vector_element_basic_type(n)));
18712 match(Set dst (LoadVectorGather mem idx));
18713 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18714 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18715 ins_encode %{
18716 int vlen_enc = vector_length_encoding(this);
18717 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18718 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18719 __ lea($tmp$$Register, $mem$$Address);
18720 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18721 %}
18722 ins_pipe( pipe_slow );
18723 %}
18724
18725 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18726 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18727 !is_subword_type(Matcher::vector_element_basic_type(n)));
18728 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18729 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18730 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18731 ins_encode %{
18732 assert(UseAVX > 2, "sanity");
18733 int vlen_enc = vector_length_encoding(this);
18734 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18735 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18736 // Note: Since gather instruction partially updates the opmask register used
18737 // for predication hense moving mask operand to a temporary.
18738 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18739 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18740 __ lea($tmp$$Register, $mem$$Address);
18741 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18742 %}
18743 ins_pipe( pipe_slow );
18744 %}
18745
18746 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18747 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18748 match(Set dst (LoadVectorGather mem idx_base));
18749 effect(TEMP tmp, TEMP rtmp);
18750 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18751 ins_encode %{
18752 int vlen_enc = vector_length_encoding(this);
18753 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18754 __ lea($tmp$$Register, $mem$$Address);
18755 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18756 %}
18757 ins_pipe( pipe_slow );
18758 %}
18759
18760 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18761 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18762 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18763 match(Set dst (LoadVectorGather mem idx_base));
18764 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18765 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18766 ins_encode %{
18767 int vlen_enc = vector_length_encoding(this);
18768 int vector_len = Matcher::vector_length(this);
18769 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18770 __ lea($tmp$$Register, $mem$$Address);
18771 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18772 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18773 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18774 %}
18775 ins_pipe( pipe_slow );
18776 %}
18777
18778 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18779 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18780 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18781 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18782 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18783 ins_encode %{
18784 int vlen_enc = vector_length_encoding(this);
18785 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18786 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18787 __ lea($tmp$$Register, $mem$$Address);
18788 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18789 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18790 %}
18791 ins_pipe( pipe_slow );
18792 %}
18793
18794 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18795 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18796 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18797 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18798 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18799 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18800 ins_encode %{
18801 int vlen_enc = vector_length_encoding(this);
18802 int vector_len = Matcher::vector_length(this);
18803 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18804 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18805 __ lea($tmp$$Register, $mem$$Address);
18806 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18807 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18808 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18809 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18810 %}
18811 ins_pipe( pipe_slow );
18812 %}
18813
18814 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18815 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18816 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18817 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18818 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18819 ins_encode %{
18820 int vlen_enc = vector_length_encoding(this);
18821 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18822 __ lea($tmp$$Register, $mem$$Address);
18823 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18824 if (elem_bt == T_SHORT) {
18825 __ movl($mask_idx$$Register, 0x55555555);
18826 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18827 }
18828 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18829 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18830 %}
18831 ins_pipe( pipe_slow );
18832 %}
18833
18834 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18835 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18836 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18837 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18838 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18839 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18840 ins_encode %{
18841 int vlen_enc = vector_length_encoding(this);
18842 int vector_len = Matcher::vector_length(this);
18843 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18844 __ lea($tmp$$Register, $mem$$Address);
18845 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18846 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18847 if (elem_bt == T_SHORT) {
18848 __ movl($mask_idx$$Register, 0x55555555);
18849 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18850 }
18851 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18852 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18853 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18854 %}
18855 ins_pipe( pipe_slow );
18856 %}
18857
18858 // ====================Scatter=======================================
18859
18860 // Scatter INT, LONG, FLOAT, DOUBLE
18861
18862 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18863 predicate(UseAVX > 2);
18864 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18865 effect(TEMP tmp, TEMP ktmp);
18866 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18867 ins_encode %{
18868 int vlen_enc = vector_length_encoding(this, $src);
18869 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18870
18871 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18872 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18873
18874 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18875 __ lea($tmp$$Register, $mem$$Address);
18876 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18877 %}
18878 ins_pipe( pipe_slow );
18879 %}
18880
18881 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18882 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18883 effect(TEMP tmp, TEMP ktmp);
18884 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18885 ins_encode %{
18886 int vlen_enc = vector_length_encoding(this, $src);
18887 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18888 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18889 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18890 // Note: Since scatter instruction partially updates the opmask register used
18891 // for predication hense moving mask operand to a temporary.
18892 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18893 __ lea($tmp$$Register, $mem$$Address);
18894 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18895 %}
18896 ins_pipe( pipe_slow );
18897 %}
18898
18899 // ====================REPLICATE=======================================
18900
18901 // Replicate byte scalar to be vector
18902 instruct vReplB_reg(vec dst, rRegI src) %{
18903 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18904 match(Set dst (Replicate src));
18905 format %{ "replicateB $dst,$src" %}
18906 ins_encode %{
18907 uint vlen = Matcher::vector_length(this);
18908 if (UseAVX >= 2) {
18909 int vlen_enc = vector_length_encoding(this);
18910 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18911 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18912 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18913 } else {
18914 __ movdl($dst$$XMMRegister, $src$$Register);
18915 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18916 }
18917 } else {
18918 assert(UseAVX < 2, "");
18919 __ movdl($dst$$XMMRegister, $src$$Register);
18920 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18921 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18922 if (vlen >= 16) {
18923 assert(vlen == 16, "");
18924 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18925 }
18926 }
18927 %}
18928 ins_pipe( pipe_slow );
18929 %}
18930
18931 instruct ReplB_mem(vec dst, memory mem) %{
18932 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18933 match(Set dst (Replicate (LoadB mem)));
18934 format %{ "replicateB $dst,$mem" %}
18935 ins_encode %{
18936 int vlen_enc = vector_length_encoding(this);
18937 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18938 %}
18939 ins_pipe( pipe_slow );
18940 %}
18941
18942 // ====================ReplicateS=======================================
18943
18944 instruct vReplS_reg(vec dst, rRegI src) %{
18945 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18946 match(Set dst (Replicate src));
18947 format %{ "replicateS $dst,$src" %}
18948 ins_encode %{
18949 uint vlen = Matcher::vector_length(this);
18950 int vlen_enc = vector_length_encoding(this);
18951 if (UseAVX >= 2) {
18952 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18953 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18954 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18955 } else {
18956 __ movdl($dst$$XMMRegister, $src$$Register);
18957 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18958 }
18959 } else {
18960 assert(UseAVX < 2, "");
18961 __ movdl($dst$$XMMRegister, $src$$Register);
18962 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18963 if (vlen >= 8) {
18964 assert(vlen == 8, "");
18965 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18966 }
18967 }
18968 %}
18969 ins_pipe( pipe_slow );
18970 %}
18971
18972 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18973 match(Set dst (Replicate con));
18974 effect(TEMP rtmp);
18975 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18976 ins_encode %{
18977 int vlen_enc = vector_length_encoding(this);
18978 BasicType bt = Matcher::vector_element_basic_type(this);
18979 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18980 __ movl($rtmp$$Register, $con$$constant);
18981 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18982 %}
18983 ins_pipe( pipe_slow );
18984 %}
18985
18986 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18987 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18988 match(Set dst (Replicate src));
18989 effect(TEMP rtmp);
18990 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18991 ins_encode %{
18992 int vlen_enc = vector_length_encoding(this);
18993 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18994 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18995 %}
18996 ins_pipe( pipe_slow );
18997 %}
18998
18999 instruct ReplS_mem(vec dst, memory mem) %{
19000 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19001 match(Set dst (Replicate (LoadS mem)));
19002 format %{ "replicateS $dst,$mem" %}
19003 ins_encode %{
19004 int vlen_enc = vector_length_encoding(this);
19005 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19006 %}
19007 ins_pipe( pipe_slow );
19008 %}
19009
19010 // ====================ReplicateI=======================================
19011
19012 instruct ReplI_reg(vec dst, rRegI src) %{
19013 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19014 match(Set dst (Replicate src));
19015 format %{ "replicateI $dst,$src" %}
19016 ins_encode %{
19017 uint vlen = Matcher::vector_length(this);
19018 int vlen_enc = vector_length_encoding(this);
19019 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19020 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19021 } else if (VM_Version::supports_avx2()) {
19022 __ movdl($dst$$XMMRegister, $src$$Register);
19023 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19024 } else {
19025 __ movdl($dst$$XMMRegister, $src$$Register);
19026 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19027 }
19028 %}
19029 ins_pipe( pipe_slow );
19030 %}
19031
19032 instruct ReplI_mem(vec dst, memory mem) %{
19033 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19034 match(Set dst (Replicate (LoadI mem)));
19035 format %{ "replicateI $dst,$mem" %}
19036 ins_encode %{
19037 int vlen_enc = vector_length_encoding(this);
19038 if (VM_Version::supports_avx2()) {
19039 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19040 } else if (VM_Version::supports_avx()) {
19041 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19042 } else {
19043 __ movdl($dst$$XMMRegister, $mem$$Address);
19044 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19045 }
19046 %}
19047 ins_pipe( pipe_slow );
19048 %}
19049
19050 instruct ReplI_imm(vec dst, immI con) %{
19051 predicate(Matcher::is_non_long_integral_vector(n));
19052 match(Set dst (Replicate con));
19053 format %{ "replicateI $dst,$con" %}
19054 ins_encode %{
19055 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19056 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19057 type2aelembytes(Matcher::vector_element_basic_type(this))));
19058 BasicType bt = Matcher::vector_element_basic_type(this);
19059 int vlen = Matcher::vector_length_in_bytes(this);
19060 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19061 %}
19062 ins_pipe( pipe_slow );
19063 %}
19064
19065 // Replicate scalar zero to be vector
19066 instruct ReplI_zero(vec dst, immI_0 zero) %{
19067 predicate(Matcher::is_non_long_integral_vector(n));
19068 match(Set dst (Replicate zero));
19069 format %{ "replicateI $dst,$zero" %}
19070 ins_encode %{
19071 int vlen_enc = vector_length_encoding(this);
19072 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19073 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19074 } else {
19075 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19076 }
19077 %}
19078 ins_pipe( fpu_reg_reg );
19079 %}
19080
19081 instruct ReplI_M1(vec dst, immI_M1 con) %{
19082 predicate(Matcher::is_non_long_integral_vector(n));
19083 match(Set dst (Replicate con));
19084 format %{ "vallones $dst" %}
19085 ins_encode %{
19086 int vector_len = vector_length_encoding(this);
19087 __ vallones($dst$$XMMRegister, vector_len);
19088 %}
19089 ins_pipe( pipe_slow );
19090 %}
19091
19092 // ====================ReplicateL=======================================
19093
19094 // Replicate long (8 byte) scalar to be vector
19095 instruct ReplL_reg(vec dst, rRegL src) %{
19096 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19097 match(Set dst (Replicate src));
19098 format %{ "replicateL $dst,$src" %}
19099 ins_encode %{
19100 int vlen = Matcher::vector_length(this);
19101 int vlen_enc = vector_length_encoding(this);
19102 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19103 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19104 } else if (VM_Version::supports_avx2()) {
19105 __ movdq($dst$$XMMRegister, $src$$Register);
19106 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19107 } else {
19108 __ movdq($dst$$XMMRegister, $src$$Register);
19109 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19110 }
19111 %}
19112 ins_pipe( pipe_slow );
19113 %}
19114
19115 instruct ReplL_mem(vec dst, memory mem) %{
19116 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19117 match(Set dst (Replicate (LoadL mem)));
19118 format %{ "replicateL $dst,$mem" %}
19119 ins_encode %{
19120 int vlen_enc = vector_length_encoding(this);
19121 if (VM_Version::supports_avx2()) {
19122 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19123 } else if (VM_Version::supports_sse3()) {
19124 __ movddup($dst$$XMMRegister, $mem$$Address);
19125 } else {
19126 __ movq($dst$$XMMRegister, $mem$$Address);
19127 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19128 }
19129 %}
19130 ins_pipe( pipe_slow );
19131 %}
19132
19133 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19134 instruct ReplL_imm(vec dst, immL con) %{
19135 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19136 match(Set dst (Replicate con));
19137 format %{ "replicateL $dst,$con" %}
19138 ins_encode %{
19139 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19140 int vlen = Matcher::vector_length_in_bytes(this);
19141 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19142 %}
19143 ins_pipe( pipe_slow );
19144 %}
19145
19146 instruct ReplL_zero(vec dst, immL0 zero) %{
19147 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19148 match(Set dst (Replicate zero));
19149 format %{ "replicateL $dst,$zero" %}
19150 ins_encode %{
19151 int vlen_enc = vector_length_encoding(this);
19152 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19153 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19154 } else {
19155 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19156 }
19157 %}
19158 ins_pipe( fpu_reg_reg );
19159 %}
19160
19161 instruct ReplL_M1(vec dst, immL_M1 con) %{
19162 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19163 match(Set dst (Replicate con));
19164 format %{ "vallones $dst" %}
19165 ins_encode %{
19166 int vector_len = vector_length_encoding(this);
19167 __ vallones($dst$$XMMRegister, vector_len);
19168 %}
19169 ins_pipe( pipe_slow );
19170 %}
19171
19172 // ====================ReplicateF=======================================
19173
19174 instruct vReplF_reg(vec dst, vlRegF src) %{
19175 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19176 match(Set dst (Replicate src));
19177 format %{ "replicateF $dst,$src" %}
19178 ins_encode %{
19179 uint vlen = Matcher::vector_length(this);
19180 int vlen_enc = vector_length_encoding(this);
19181 if (vlen <= 4) {
19182 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19183 } else if (VM_Version::supports_avx2()) {
19184 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19185 } else {
19186 assert(vlen == 8, "sanity");
19187 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19188 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19189 }
19190 %}
19191 ins_pipe( pipe_slow );
19192 %}
19193
19194 instruct ReplF_reg(vec dst, vlRegF src) %{
19195 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19196 match(Set dst (Replicate src));
19197 format %{ "replicateF $dst,$src" %}
19198 ins_encode %{
19199 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19200 %}
19201 ins_pipe( pipe_slow );
19202 %}
19203
19204 instruct ReplF_mem(vec dst, memory mem) %{
19205 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19206 match(Set dst (Replicate (LoadF mem)));
19207 format %{ "replicateF $dst,$mem" %}
19208 ins_encode %{
19209 int vlen_enc = vector_length_encoding(this);
19210 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19211 %}
19212 ins_pipe( pipe_slow );
19213 %}
19214
19215 // Replicate float scalar immediate to be vector by loading from const table.
19216 instruct ReplF_imm(vec dst, immF con) %{
19217 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19218 match(Set dst (Replicate con));
19219 format %{ "replicateF $dst,$con" %}
19220 ins_encode %{
19221 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19222 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19223 int vlen = Matcher::vector_length_in_bytes(this);
19224 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19225 %}
19226 ins_pipe( pipe_slow );
19227 %}
19228
19229 instruct ReplF_zero(vec dst, immF0 zero) %{
19230 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19231 match(Set dst (Replicate zero));
19232 format %{ "replicateF $dst,$zero" %}
19233 ins_encode %{
19234 int vlen_enc = vector_length_encoding(this);
19235 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19236 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19237 } else {
19238 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19239 }
19240 %}
19241 ins_pipe( fpu_reg_reg );
19242 %}
19243
19244 // ====================ReplicateD=======================================
19245
19246 // Replicate double (8 bytes) scalar to be vector
19247 instruct vReplD_reg(vec dst, vlRegD src) %{
19248 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19249 match(Set dst (Replicate src));
19250 format %{ "replicateD $dst,$src" %}
19251 ins_encode %{
19252 uint vlen = Matcher::vector_length(this);
19253 int vlen_enc = vector_length_encoding(this);
19254 if (vlen <= 2) {
19255 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19256 } else if (VM_Version::supports_avx2()) {
19257 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19258 } else {
19259 assert(vlen == 4, "sanity");
19260 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19261 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19262 }
19263 %}
19264 ins_pipe( pipe_slow );
19265 %}
19266
19267 instruct ReplD_reg(vec dst, vlRegD src) %{
19268 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19269 match(Set dst (Replicate src));
19270 format %{ "replicateD $dst,$src" %}
19271 ins_encode %{
19272 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19273 %}
19274 ins_pipe( pipe_slow );
19275 %}
19276
19277 instruct ReplD_mem(vec dst, memory mem) %{
19278 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19279 match(Set dst (Replicate (LoadD mem)));
19280 format %{ "replicateD $dst,$mem" %}
19281 ins_encode %{
19282 if (Matcher::vector_length(this) >= 4) {
19283 int vlen_enc = vector_length_encoding(this);
19284 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19285 } else {
19286 __ movddup($dst$$XMMRegister, $mem$$Address);
19287 }
19288 %}
19289 ins_pipe( pipe_slow );
19290 %}
19291
19292 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19293 instruct ReplD_imm(vec dst, immD con) %{
19294 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19295 match(Set dst (Replicate con));
19296 format %{ "replicateD $dst,$con" %}
19297 ins_encode %{
19298 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19299 int vlen = Matcher::vector_length_in_bytes(this);
19300 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19301 %}
19302 ins_pipe( pipe_slow );
19303 %}
19304
19305 instruct ReplD_zero(vec dst, immD0 zero) %{
19306 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19307 match(Set dst (Replicate zero));
19308 format %{ "replicateD $dst,$zero" %}
19309 ins_encode %{
19310 int vlen_enc = vector_length_encoding(this);
19311 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19312 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19313 } else {
19314 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19315 }
19316 %}
19317 ins_pipe( fpu_reg_reg );
19318 %}
19319
19320 // ====================VECTOR INSERT=======================================
19321
19322 instruct insert(vec dst, rRegI val, immU8 idx) %{
19323 predicate(Matcher::vector_length_in_bytes(n) < 32);
19324 match(Set dst (VectorInsert (Binary dst val) idx));
19325 format %{ "vector_insert $dst,$val,$idx" %}
19326 ins_encode %{
19327 assert(UseSSE >= 4, "required");
19328 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19329
19330 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19331
19332 assert(is_integral_type(elem_bt), "");
19333 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19334
19335 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19336 %}
19337 ins_pipe( pipe_slow );
19338 %}
19339
19340 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19341 predicate(Matcher::vector_length_in_bytes(n) == 32);
19342 match(Set dst (VectorInsert (Binary src val) idx));
19343 effect(TEMP vtmp);
19344 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19345 ins_encode %{
19346 int vlen_enc = Assembler::AVX_256bit;
19347 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19348 int elem_per_lane = 16/type2aelembytes(elem_bt);
19349 int log2epr = log2(elem_per_lane);
19350
19351 assert(is_integral_type(elem_bt), "sanity");
19352 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19353
19354 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19355 uint y_idx = ($idx$$constant >> log2epr) & 1;
19356 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19357 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19358 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19359 %}
19360 ins_pipe( pipe_slow );
19361 %}
19362
19363 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19364 predicate(Matcher::vector_length_in_bytes(n) == 64);
19365 match(Set dst (VectorInsert (Binary src val) idx));
19366 effect(TEMP vtmp);
19367 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19368 ins_encode %{
19369 assert(UseAVX > 2, "sanity");
19370
19371 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19372 int elem_per_lane = 16/type2aelembytes(elem_bt);
19373 int log2epr = log2(elem_per_lane);
19374
19375 assert(is_integral_type(elem_bt), "");
19376 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19377
19378 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19379 uint y_idx = ($idx$$constant >> log2epr) & 3;
19380 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19381 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19382 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19383 %}
19384 ins_pipe( pipe_slow );
19385 %}
19386
19387 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19388 predicate(Matcher::vector_length(n) == 2);
19389 match(Set dst (VectorInsert (Binary dst val) idx));
19390 format %{ "vector_insert $dst,$val,$idx" %}
19391 ins_encode %{
19392 assert(UseSSE >= 4, "required");
19393 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19394 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19395
19396 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19397 %}
19398 ins_pipe( pipe_slow );
19399 %}
19400
19401 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19402 predicate(Matcher::vector_length(n) == 4);
19403 match(Set dst (VectorInsert (Binary src val) idx));
19404 effect(TEMP vtmp);
19405 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19406 ins_encode %{
19407 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19408 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19409
19410 uint x_idx = $idx$$constant & right_n_bits(1);
19411 uint y_idx = ($idx$$constant >> 1) & 1;
19412 int vlen_enc = Assembler::AVX_256bit;
19413 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19414 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19415 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19416 %}
19417 ins_pipe( pipe_slow );
19418 %}
19419
19420 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19421 predicate(Matcher::vector_length(n) == 8);
19422 match(Set dst (VectorInsert (Binary src val) idx));
19423 effect(TEMP vtmp);
19424 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19425 ins_encode %{
19426 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19427 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19428
19429 uint x_idx = $idx$$constant & right_n_bits(1);
19430 uint y_idx = ($idx$$constant >> 1) & 3;
19431 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19432 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19433 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19434 %}
19435 ins_pipe( pipe_slow );
19436 %}
19437
19438 instruct insertF(vec dst, regF val, immU8 idx) %{
19439 predicate(Matcher::vector_length(n) < 8);
19440 match(Set dst (VectorInsert (Binary dst val) idx));
19441 format %{ "vector_insert $dst,$val,$idx" %}
19442 ins_encode %{
19443 assert(UseSSE >= 4, "sanity");
19444
19445 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19446 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19447
19448 uint x_idx = $idx$$constant & right_n_bits(2);
19449 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19450 %}
19451 ins_pipe( pipe_slow );
19452 %}
19453
19454 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19455 predicate(Matcher::vector_length(n) >= 8);
19456 match(Set dst (VectorInsert (Binary src val) idx));
19457 effect(TEMP vtmp);
19458 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19459 ins_encode %{
19460 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19461 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19462
19463 int vlen = Matcher::vector_length(this);
19464 uint x_idx = $idx$$constant & right_n_bits(2);
19465 if (vlen == 8) {
19466 uint y_idx = ($idx$$constant >> 2) & 1;
19467 int vlen_enc = Assembler::AVX_256bit;
19468 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19469 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19470 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19471 } else {
19472 assert(vlen == 16, "sanity");
19473 uint y_idx = ($idx$$constant >> 2) & 3;
19474 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19475 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19476 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19477 }
19478 %}
19479 ins_pipe( pipe_slow );
19480 %}
19481
19482 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19483 predicate(Matcher::vector_length(n) == 2);
19484 match(Set dst (VectorInsert (Binary dst val) idx));
19485 effect(TEMP tmp);
19486 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19487 ins_encode %{
19488 assert(UseSSE >= 4, "sanity");
19489 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19490 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19491
19492 __ movq($tmp$$Register, $val$$XMMRegister);
19493 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19494 %}
19495 ins_pipe( pipe_slow );
19496 %}
19497
19498 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19499 predicate(Matcher::vector_length(n) == 4);
19500 match(Set dst (VectorInsert (Binary src val) idx));
19501 effect(TEMP vtmp, TEMP tmp);
19502 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19503 ins_encode %{
19504 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19505 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19506
19507 uint x_idx = $idx$$constant & right_n_bits(1);
19508 uint y_idx = ($idx$$constant >> 1) & 1;
19509 int vlen_enc = Assembler::AVX_256bit;
19510 __ movq($tmp$$Register, $val$$XMMRegister);
19511 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19512 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19513 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19514 %}
19515 ins_pipe( pipe_slow );
19516 %}
19517
19518 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19519 predicate(Matcher::vector_length(n) == 8);
19520 match(Set dst (VectorInsert (Binary src val) idx));
19521 effect(TEMP tmp, TEMP vtmp);
19522 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19523 ins_encode %{
19524 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19525 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19526
19527 uint x_idx = $idx$$constant & right_n_bits(1);
19528 uint y_idx = ($idx$$constant >> 1) & 3;
19529 __ movq($tmp$$Register, $val$$XMMRegister);
19530 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19531 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19532 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19533 %}
19534 ins_pipe( pipe_slow );
19535 %}
19536
19537 // ====================REDUCTION ARITHMETIC=======================================
19538
19539 // =======================Int Reduction==========================================
19540
19541 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19542 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19543 match(Set dst (AddReductionVI src1 src2));
19544 match(Set dst (MulReductionVI src1 src2));
19545 match(Set dst (AndReductionV src1 src2));
19546 match(Set dst ( OrReductionV src1 src2));
19547 match(Set dst (XorReductionV src1 src2));
19548 match(Set dst (MinReductionV src1 src2));
19549 match(Set dst (MaxReductionV src1 src2));
19550 effect(TEMP vtmp1, TEMP vtmp2);
19551 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19552 ins_encode %{
19553 int opcode = this->ideal_Opcode();
19554 int vlen = Matcher::vector_length(this, $src2);
19555 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19556 %}
19557 ins_pipe( pipe_slow );
19558 %}
19559
19560 // =======================Long Reduction==========================================
19561
19562 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19563 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19564 match(Set dst (AddReductionVL src1 src2));
19565 match(Set dst (MulReductionVL src1 src2));
19566 match(Set dst (AndReductionV src1 src2));
19567 match(Set dst ( OrReductionV src1 src2));
19568 match(Set dst (XorReductionV src1 src2));
19569 match(Set dst (MinReductionV src1 src2));
19570 match(Set dst (MaxReductionV src1 src2));
19571 effect(TEMP vtmp1, TEMP vtmp2);
19572 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19573 ins_encode %{
19574 int opcode = this->ideal_Opcode();
19575 int vlen = Matcher::vector_length(this, $src2);
19576 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19577 %}
19578 ins_pipe( pipe_slow );
19579 %}
19580
19581 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19582 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19583 match(Set dst (AddReductionVL src1 src2));
19584 match(Set dst (MulReductionVL src1 src2));
19585 match(Set dst (AndReductionV src1 src2));
19586 match(Set dst ( OrReductionV src1 src2));
19587 match(Set dst (XorReductionV src1 src2));
19588 match(Set dst (MinReductionV src1 src2));
19589 match(Set dst (MaxReductionV src1 src2));
19590 effect(TEMP vtmp1, TEMP vtmp2);
19591 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19592 ins_encode %{
19593 int opcode = this->ideal_Opcode();
19594 int vlen = Matcher::vector_length(this, $src2);
19595 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19596 %}
19597 ins_pipe( pipe_slow );
19598 %}
19599
19600 // =======================Float Reduction==========================================
19601
19602 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19603 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19604 match(Set dst (AddReductionVF dst src));
19605 match(Set dst (MulReductionVF dst src));
19606 effect(TEMP dst, TEMP vtmp);
19607 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19608 ins_encode %{
19609 int opcode = this->ideal_Opcode();
19610 int vlen = Matcher::vector_length(this, $src);
19611 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19612 %}
19613 ins_pipe( pipe_slow );
19614 %}
19615
19616 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19617 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19618 match(Set dst (AddReductionVF dst src));
19619 match(Set dst (MulReductionVF dst src));
19620 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19621 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19622 ins_encode %{
19623 int opcode = this->ideal_Opcode();
19624 int vlen = Matcher::vector_length(this, $src);
19625 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19626 %}
19627 ins_pipe( pipe_slow );
19628 %}
19629
19630 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19631 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19632 match(Set dst (AddReductionVF dst src));
19633 match(Set dst (MulReductionVF dst src));
19634 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19635 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19636 ins_encode %{
19637 int opcode = this->ideal_Opcode();
19638 int vlen = Matcher::vector_length(this, $src);
19639 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19640 %}
19641 ins_pipe( pipe_slow );
19642 %}
19643
19644
19645 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19646 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19647 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19648 // src1 contains reduction identity
19649 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19650 match(Set dst (AddReductionVF src1 src2));
19651 match(Set dst (MulReductionVF src1 src2));
19652 effect(TEMP dst);
19653 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19654 ins_encode %{
19655 int opcode = this->ideal_Opcode();
19656 int vlen = Matcher::vector_length(this, $src2);
19657 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19658 %}
19659 ins_pipe( pipe_slow );
19660 %}
19661
19662 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19663 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19664 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19665 // src1 contains reduction identity
19666 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19667 match(Set dst (AddReductionVF src1 src2));
19668 match(Set dst (MulReductionVF src1 src2));
19669 effect(TEMP dst, TEMP vtmp);
19670 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19671 ins_encode %{
19672 int opcode = this->ideal_Opcode();
19673 int vlen = Matcher::vector_length(this, $src2);
19674 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19675 %}
19676 ins_pipe( pipe_slow );
19677 %}
19678
19679 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19680 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19681 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19682 // src1 contains reduction identity
19683 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19684 match(Set dst (AddReductionVF src1 src2));
19685 match(Set dst (MulReductionVF src1 src2));
19686 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19687 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19688 ins_encode %{
19689 int opcode = this->ideal_Opcode();
19690 int vlen = Matcher::vector_length(this, $src2);
19691 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19692 %}
19693 ins_pipe( pipe_slow );
19694 %}
19695
19696 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19697 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19698 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19699 // src1 contains reduction identity
19700 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19701 match(Set dst (AddReductionVF src1 src2));
19702 match(Set dst (MulReductionVF src1 src2));
19703 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19704 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19705 ins_encode %{
19706 int opcode = this->ideal_Opcode();
19707 int vlen = Matcher::vector_length(this, $src2);
19708 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19709 %}
19710 ins_pipe( pipe_slow );
19711 %}
19712
19713 // =======================Double Reduction==========================================
19714
19715 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19716 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19717 match(Set dst (AddReductionVD dst src));
19718 match(Set dst (MulReductionVD dst src));
19719 effect(TEMP dst, TEMP vtmp);
19720 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19721 ins_encode %{
19722 int opcode = this->ideal_Opcode();
19723 int vlen = Matcher::vector_length(this, $src);
19724 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19725 %}
19726 ins_pipe( pipe_slow );
19727 %}
19728
19729 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19730 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19731 match(Set dst (AddReductionVD dst src));
19732 match(Set dst (MulReductionVD dst src));
19733 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19734 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19735 ins_encode %{
19736 int opcode = this->ideal_Opcode();
19737 int vlen = Matcher::vector_length(this, $src);
19738 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19739 %}
19740 ins_pipe( pipe_slow );
19741 %}
19742
19743 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19744 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19745 match(Set dst (AddReductionVD dst src));
19746 match(Set dst (MulReductionVD dst src));
19747 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19748 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19749 ins_encode %{
19750 int opcode = this->ideal_Opcode();
19751 int vlen = Matcher::vector_length(this, $src);
19752 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19753 %}
19754 ins_pipe( pipe_slow );
19755 %}
19756
19757 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19758 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19759 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19760 // src1 contains reduction identity
19761 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19762 match(Set dst (AddReductionVD src1 src2));
19763 match(Set dst (MulReductionVD src1 src2));
19764 effect(TEMP dst);
19765 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19766 ins_encode %{
19767 int opcode = this->ideal_Opcode();
19768 int vlen = Matcher::vector_length(this, $src2);
19769 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19770 %}
19771 ins_pipe( pipe_slow );
19772 %}
19773
19774 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19775 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19776 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19777 // src1 contains reduction identity
19778 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19779 match(Set dst (AddReductionVD src1 src2));
19780 match(Set dst (MulReductionVD src1 src2));
19781 effect(TEMP dst, TEMP vtmp);
19782 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19783 ins_encode %{
19784 int opcode = this->ideal_Opcode();
19785 int vlen = Matcher::vector_length(this, $src2);
19786 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19787 %}
19788 ins_pipe( pipe_slow );
19789 %}
19790
19791 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19792 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19793 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19794 // src1 contains reduction identity
19795 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19796 match(Set dst (AddReductionVD src1 src2));
19797 match(Set dst (MulReductionVD src1 src2));
19798 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19799 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19800 ins_encode %{
19801 int opcode = this->ideal_Opcode();
19802 int vlen = Matcher::vector_length(this, $src2);
19803 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19804 %}
19805 ins_pipe( pipe_slow );
19806 %}
19807
19808 // =======================Byte Reduction==========================================
19809
19810 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19811 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19812 match(Set dst (AddReductionVI src1 src2));
19813 match(Set dst (AndReductionV src1 src2));
19814 match(Set dst ( OrReductionV src1 src2));
19815 match(Set dst (XorReductionV src1 src2));
19816 match(Set dst (MinReductionV src1 src2));
19817 match(Set dst (MaxReductionV src1 src2));
19818 effect(TEMP vtmp1, TEMP vtmp2);
19819 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19820 ins_encode %{
19821 int opcode = this->ideal_Opcode();
19822 int vlen = Matcher::vector_length(this, $src2);
19823 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19824 %}
19825 ins_pipe( pipe_slow );
19826 %}
19827
19828 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19829 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19830 match(Set dst (AddReductionVI src1 src2));
19831 match(Set dst (AndReductionV src1 src2));
19832 match(Set dst ( OrReductionV src1 src2));
19833 match(Set dst (XorReductionV src1 src2));
19834 match(Set dst (MinReductionV src1 src2));
19835 match(Set dst (MaxReductionV src1 src2));
19836 effect(TEMP vtmp1, TEMP vtmp2);
19837 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19838 ins_encode %{
19839 int opcode = this->ideal_Opcode();
19840 int vlen = Matcher::vector_length(this, $src2);
19841 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19842 %}
19843 ins_pipe( pipe_slow );
19844 %}
19845
19846 // =======================Short Reduction==========================================
19847
19848 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19849 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19850 match(Set dst (AddReductionVI src1 src2));
19851 match(Set dst (MulReductionVI src1 src2));
19852 match(Set dst (AndReductionV src1 src2));
19853 match(Set dst ( OrReductionV src1 src2));
19854 match(Set dst (XorReductionV src1 src2));
19855 match(Set dst (MinReductionV src1 src2));
19856 match(Set dst (MaxReductionV src1 src2));
19857 effect(TEMP vtmp1, TEMP vtmp2);
19858 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19859 ins_encode %{
19860 int opcode = this->ideal_Opcode();
19861 int vlen = Matcher::vector_length(this, $src2);
19862 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19863 %}
19864 ins_pipe( pipe_slow );
19865 %}
19866
19867 // =======================Mul Reduction==========================================
19868
19869 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19870 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19871 Matcher::vector_length(n->in(2)) <= 32); // src2
19872 match(Set dst (MulReductionVI src1 src2));
19873 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19874 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19875 ins_encode %{
19876 int opcode = this->ideal_Opcode();
19877 int vlen = Matcher::vector_length(this, $src2);
19878 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19879 %}
19880 ins_pipe( pipe_slow );
19881 %}
19882
19883 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19884 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19885 Matcher::vector_length(n->in(2)) == 64); // src2
19886 match(Set dst (MulReductionVI src1 src2));
19887 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19888 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19889 ins_encode %{
19890 int opcode = this->ideal_Opcode();
19891 int vlen = Matcher::vector_length(this, $src2);
19892 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19893 %}
19894 ins_pipe( pipe_slow );
19895 %}
19896
19897 //--------------------Min/Max Float Reduction --------------------
19898 // Float Min Reduction
19899 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19900 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19901 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19902 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19903 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19904 Matcher::vector_length(n->in(2)) == 2);
19905 match(Set dst (MinReductionV src1 src2));
19906 match(Set dst (MaxReductionV src1 src2));
19907 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19908 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19909 ins_encode %{
19910 assert(UseAVX > 0, "sanity");
19911
19912 int opcode = this->ideal_Opcode();
19913 int vlen = Matcher::vector_length(this, $src2);
19914 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19915 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19916 %}
19917 ins_pipe( pipe_slow );
19918 %}
19919
19920 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19921 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19922 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19923 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19924 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19925 Matcher::vector_length(n->in(2)) >= 4);
19926 match(Set dst (MinReductionV src1 src2));
19927 match(Set dst (MaxReductionV src1 src2));
19928 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19929 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19930 ins_encode %{
19931 assert(UseAVX > 0, "sanity");
19932
19933 int opcode = this->ideal_Opcode();
19934 int vlen = Matcher::vector_length(this, $src2);
19935 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19936 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19937 %}
19938 ins_pipe( pipe_slow );
19939 %}
19940
19941 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19942 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19943 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19944 Matcher::vector_length(n->in(2)) == 2);
19945 match(Set dst (MinReductionV dst src));
19946 match(Set dst (MaxReductionV dst src));
19947 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19948 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19949 ins_encode %{
19950 assert(UseAVX > 0, "sanity");
19951
19952 int opcode = this->ideal_Opcode();
19953 int vlen = Matcher::vector_length(this, $src);
19954 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19955 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19956 %}
19957 ins_pipe( pipe_slow );
19958 %}
19959
19960
19961 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19962 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19963 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19964 Matcher::vector_length(n->in(2)) >= 4);
19965 match(Set dst (MinReductionV dst src));
19966 match(Set dst (MaxReductionV dst src));
19967 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19968 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19969 ins_encode %{
19970 assert(UseAVX > 0, "sanity");
19971
19972 int opcode = this->ideal_Opcode();
19973 int vlen = Matcher::vector_length(this, $src);
19974 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19975 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19976 %}
19977 ins_pipe( pipe_slow );
19978 %}
19979
19980 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19981 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19982 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19983 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19984 Matcher::vector_length(n->in(2)) == 2);
19985 match(Set dst (MinReductionV src1 src2));
19986 match(Set dst (MaxReductionV src1 src2));
19987 effect(TEMP dst, TEMP xtmp1);
19988 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19989 ins_encode %{
19990 int opcode = this->ideal_Opcode();
19991 int vlen = Matcher::vector_length(this, $src2);
19992 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19993 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19994 %}
19995 ins_pipe( pipe_slow );
19996 %}
19997
19998 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19999 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20000 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20001 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20002 Matcher::vector_length(n->in(2)) >= 4);
20003 match(Set dst (MinReductionV src1 src2));
20004 match(Set dst (MaxReductionV src1 src2));
20005 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20006 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20007 ins_encode %{
20008 int opcode = this->ideal_Opcode();
20009 int vlen = Matcher::vector_length(this, $src2);
20010 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20011 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20012 %}
20013 ins_pipe( pipe_slow );
20014 %}
20015
20016 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20017 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20018 Matcher::vector_length(n->in(2)) == 2);
20019 match(Set dst (MinReductionV dst src));
20020 match(Set dst (MaxReductionV dst src));
20021 effect(TEMP dst, TEMP xtmp1);
20022 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20023 ins_encode %{
20024 int opcode = this->ideal_Opcode();
20025 int vlen = Matcher::vector_length(this, $src);
20026 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20027 $xtmp1$$XMMRegister);
20028 %}
20029 ins_pipe( pipe_slow );
20030 %}
20031
20032 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20033 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20034 Matcher::vector_length(n->in(2)) >= 4);
20035 match(Set dst (MinReductionV dst src));
20036 match(Set dst (MaxReductionV dst src));
20037 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20038 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20039 ins_encode %{
20040 int opcode = this->ideal_Opcode();
20041 int vlen = Matcher::vector_length(this, $src);
20042 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20043 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20044 %}
20045 ins_pipe( pipe_slow );
20046 %}
20047
20048 //--------------------Min Double Reduction --------------------
20049 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20050 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20051 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20052 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20053 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20054 Matcher::vector_length(n->in(2)) == 2);
20055 match(Set dst (MinReductionV src1 src2));
20056 match(Set dst (MaxReductionV src1 src2));
20057 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20058 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20059 ins_encode %{
20060 assert(UseAVX > 0, "sanity");
20061
20062 int opcode = this->ideal_Opcode();
20063 int vlen = Matcher::vector_length(this, $src2);
20064 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20065 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20066 %}
20067 ins_pipe( pipe_slow );
20068 %}
20069
20070 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20071 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20072 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20073 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20074 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20075 Matcher::vector_length(n->in(2)) >= 4);
20076 match(Set dst (MinReductionV src1 src2));
20077 match(Set dst (MaxReductionV src1 src2));
20078 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20079 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20080 ins_encode %{
20081 assert(UseAVX > 0, "sanity");
20082
20083 int opcode = this->ideal_Opcode();
20084 int vlen = Matcher::vector_length(this, $src2);
20085 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20086 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20087 %}
20088 ins_pipe( pipe_slow );
20089 %}
20090
20091
20092 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20093 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20094 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20095 Matcher::vector_length(n->in(2)) == 2);
20096 match(Set dst (MinReductionV dst src));
20097 match(Set dst (MaxReductionV dst src));
20098 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20099 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20100 ins_encode %{
20101 assert(UseAVX > 0, "sanity");
20102
20103 int opcode = this->ideal_Opcode();
20104 int vlen = Matcher::vector_length(this, $src);
20105 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20106 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20107 %}
20108 ins_pipe( pipe_slow );
20109 %}
20110
20111 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20112 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20113 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20114 Matcher::vector_length(n->in(2)) >= 4);
20115 match(Set dst (MinReductionV dst src));
20116 match(Set dst (MaxReductionV dst src));
20117 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20118 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20119 ins_encode %{
20120 assert(UseAVX > 0, "sanity");
20121
20122 int opcode = this->ideal_Opcode();
20123 int vlen = Matcher::vector_length(this, $src);
20124 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20125 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20126 %}
20127 ins_pipe( pipe_slow );
20128 %}
20129
20130 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20131 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20132 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20133 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20134 Matcher::vector_length(n->in(2)) == 2);
20135 match(Set dst (MinReductionV src1 src2));
20136 match(Set dst (MaxReductionV src1 src2));
20137 effect(TEMP dst, TEMP xtmp1);
20138 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20139 ins_encode %{
20140 int opcode = this->ideal_Opcode();
20141 int vlen = Matcher::vector_length(this, $src2);
20142 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20143 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20144 %}
20145 ins_pipe( pipe_slow );
20146 %}
20147
20148 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20149 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20150 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20151 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20152 Matcher::vector_length(n->in(2)) >= 4);
20153 match(Set dst (MinReductionV src1 src2));
20154 match(Set dst (MaxReductionV src1 src2));
20155 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20156 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20157 ins_encode %{
20158 int opcode = this->ideal_Opcode();
20159 int vlen = Matcher::vector_length(this, $src2);
20160 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20161 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20162 %}
20163 ins_pipe( pipe_slow );
20164 %}
20165
20166
20167 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20168 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20169 Matcher::vector_length(n->in(2)) == 2);
20170 match(Set dst (MinReductionV dst src));
20171 match(Set dst (MaxReductionV dst src));
20172 effect(TEMP dst, TEMP xtmp1);
20173 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20174 ins_encode %{
20175 int opcode = this->ideal_Opcode();
20176 int vlen = Matcher::vector_length(this, $src);
20177 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20178 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20179 %}
20180 ins_pipe( pipe_slow );
20181 %}
20182
20183 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20184 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20185 Matcher::vector_length(n->in(2)) >= 4);
20186 match(Set dst (MinReductionV dst src));
20187 match(Set dst (MaxReductionV dst src));
20188 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20189 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20190 ins_encode %{
20191 int opcode = this->ideal_Opcode();
20192 int vlen = Matcher::vector_length(this, $src);
20193 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20194 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20195 %}
20196 ins_pipe( pipe_slow );
20197 %}
20198
20199 // ====================VECTOR ARITHMETIC=======================================
20200
20201 // --------------------------------- ADD --------------------------------------
20202
20203 // Bytes vector add
20204 instruct vaddB(vec dst, vec src) %{
20205 predicate(UseAVX == 0);
20206 match(Set dst (AddVB dst src));
20207 format %{ "paddb $dst,$src\t! add packedB" %}
20208 ins_encode %{
20209 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20210 %}
20211 ins_pipe( pipe_slow );
20212 %}
20213
20214 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20215 predicate(UseAVX > 0);
20216 match(Set dst (AddVB src1 src2));
20217 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20218 ins_encode %{
20219 int vlen_enc = vector_length_encoding(this);
20220 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20221 %}
20222 ins_pipe( pipe_slow );
20223 %}
20224
20225 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20226 predicate((UseAVX > 0) &&
20227 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20228 match(Set dst (AddVB src (LoadVector mem)));
20229 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20230 ins_encode %{
20231 int vlen_enc = vector_length_encoding(this);
20232 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20233 %}
20234 ins_pipe( pipe_slow );
20235 %}
20236
20237 // Shorts/Chars vector add
20238 instruct vaddS(vec dst, vec src) %{
20239 predicate(UseAVX == 0);
20240 match(Set dst (AddVS dst src));
20241 format %{ "paddw $dst,$src\t! add packedS" %}
20242 ins_encode %{
20243 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20244 %}
20245 ins_pipe( pipe_slow );
20246 %}
20247
20248 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20249 predicate(UseAVX > 0);
20250 match(Set dst (AddVS src1 src2));
20251 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20252 ins_encode %{
20253 int vlen_enc = vector_length_encoding(this);
20254 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20255 %}
20256 ins_pipe( pipe_slow );
20257 %}
20258
20259 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20260 predicate((UseAVX > 0) &&
20261 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20262 match(Set dst (AddVS src (LoadVector mem)));
20263 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20264 ins_encode %{
20265 int vlen_enc = vector_length_encoding(this);
20266 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20267 %}
20268 ins_pipe( pipe_slow );
20269 %}
20270
20271 // Integers vector add
20272 instruct vaddI(vec dst, vec src) %{
20273 predicate(UseAVX == 0);
20274 match(Set dst (AddVI dst src));
20275 format %{ "paddd $dst,$src\t! add packedI" %}
20276 ins_encode %{
20277 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20278 %}
20279 ins_pipe( pipe_slow );
20280 %}
20281
20282 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20283 predicate(UseAVX > 0);
20284 match(Set dst (AddVI src1 src2));
20285 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20286 ins_encode %{
20287 int vlen_enc = vector_length_encoding(this);
20288 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20289 %}
20290 ins_pipe( pipe_slow );
20291 %}
20292
20293
20294 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20295 predicate((UseAVX > 0) &&
20296 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20297 match(Set dst (AddVI src (LoadVector mem)));
20298 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20299 ins_encode %{
20300 int vlen_enc = vector_length_encoding(this);
20301 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20302 %}
20303 ins_pipe( pipe_slow );
20304 %}
20305
20306 // Longs vector add
20307 instruct vaddL(vec dst, vec src) %{
20308 predicate(UseAVX == 0);
20309 match(Set dst (AddVL dst src));
20310 format %{ "paddq $dst,$src\t! add packedL" %}
20311 ins_encode %{
20312 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20313 %}
20314 ins_pipe( pipe_slow );
20315 %}
20316
20317 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20318 predicate(UseAVX > 0);
20319 match(Set dst (AddVL src1 src2));
20320 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20321 ins_encode %{
20322 int vlen_enc = vector_length_encoding(this);
20323 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20324 %}
20325 ins_pipe( pipe_slow );
20326 %}
20327
20328 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20329 predicate((UseAVX > 0) &&
20330 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20331 match(Set dst (AddVL src (LoadVector mem)));
20332 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20333 ins_encode %{
20334 int vlen_enc = vector_length_encoding(this);
20335 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20336 %}
20337 ins_pipe( pipe_slow );
20338 %}
20339
20340 // Floats vector add
20341 instruct vaddF(vec dst, vec src) %{
20342 predicate(UseAVX == 0);
20343 match(Set dst (AddVF dst src));
20344 format %{ "addps $dst,$src\t! add packedF" %}
20345 ins_encode %{
20346 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20347 %}
20348 ins_pipe( pipe_slow );
20349 %}
20350
20351 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20352 predicate(UseAVX > 0);
20353 match(Set dst (AddVF src1 src2));
20354 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20355 ins_encode %{
20356 int vlen_enc = vector_length_encoding(this);
20357 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20358 %}
20359 ins_pipe( pipe_slow );
20360 %}
20361
20362 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20363 predicate((UseAVX > 0) &&
20364 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20365 match(Set dst (AddVF src (LoadVector mem)));
20366 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20367 ins_encode %{
20368 int vlen_enc = vector_length_encoding(this);
20369 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20370 %}
20371 ins_pipe( pipe_slow );
20372 %}
20373
20374 // Doubles vector add
20375 instruct vaddD(vec dst, vec src) %{
20376 predicate(UseAVX == 0);
20377 match(Set dst (AddVD dst src));
20378 format %{ "addpd $dst,$src\t! add packedD" %}
20379 ins_encode %{
20380 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20381 %}
20382 ins_pipe( pipe_slow );
20383 %}
20384
20385 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20386 predicate(UseAVX > 0);
20387 match(Set dst (AddVD src1 src2));
20388 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20389 ins_encode %{
20390 int vlen_enc = vector_length_encoding(this);
20391 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20392 %}
20393 ins_pipe( pipe_slow );
20394 %}
20395
20396 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20397 predicate((UseAVX > 0) &&
20398 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20399 match(Set dst (AddVD src (LoadVector mem)));
20400 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20401 ins_encode %{
20402 int vlen_enc = vector_length_encoding(this);
20403 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20404 %}
20405 ins_pipe( pipe_slow );
20406 %}
20407
20408 // --------------------------------- SUB --------------------------------------
20409
20410 // Bytes vector sub
20411 instruct vsubB(vec dst, vec src) %{
20412 predicate(UseAVX == 0);
20413 match(Set dst (SubVB dst src));
20414 format %{ "psubb $dst,$src\t! sub packedB" %}
20415 ins_encode %{
20416 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20417 %}
20418 ins_pipe( pipe_slow );
20419 %}
20420
20421 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20422 predicate(UseAVX > 0);
20423 match(Set dst (SubVB src1 src2));
20424 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20425 ins_encode %{
20426 int vlen_enc = vector_length_encoding(this);
20427 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20428 %}
20429 ins_pipe( pipe_slow );
20430 %}
20431
20432 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20433 predicate((UseAVX > 0) &&
20434 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20435 match(Set dst (SubVB src (LoadVector mem)));
20436 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20437 ins_encode %{
20438 int vlen_enc = vector_length_encoding(this);
20439 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20440 %}
20441 ins_pipe( pipe_slow );
20442 %}
20443
20444 // Shorts/Chars vector sub
20445 instruct vsubS(vec dst, vec src) %{
20446 predicate(UseAVX == 0);
20447 match(Set dst (SubVS dst src));
20448 format %{ "psubw $dst,$src\t! sub packedS" %}
20449 ins_encode %{
20450 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20451 %}
20452 ins_pipe( pipe_slow );
20453 %}
20454
20455
20456 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20457 predicate(UseAVX > 0);
20458 match(Set dst (SubVS src1 src2));
20459 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20460 ins_encode %{
20461 int vlen_enc = vector_length_encoding(this);
20462 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463 %}
20464 ins_pipe( pipe_slow );
20465 %}
20466
20467 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20468 predicate((UseAVX > 0) &&
20469 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20470 match(Set dst (SubVS src (LoadVector mem)));
20471 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20472 ins_encode %{
20473 int vlen_enc = vector_length_encoding(this);
20474 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20475 %}
20476 ins_pipe( pipe_slow );
20477 %}
20478
20479 // Integers vector sub
20480 instruct vsubI(vec dst, vec src) %{
20481 predicate(UseAVX == 0);
20482 match(Set dst (SubVI dst src));
20483 format %{ "psubd $dst,$src\t! sub packedI" %}
20484 ins_encode %{
20485 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20486 %}
20487 ins_pipe( pipe_slow );
20488 %}
20489
20490 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20491 predicate(UseAVX > 0);
20492 match(Set dst (SubVI src1 src2));
20493 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20494 ins_encode %{
20495 int vlen_enc = vector_length_encoding(this);
20496 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20497 %}
20498 ins_pipe( pipe_slow );
20499 %}
20500
20501 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20502 predicate((UseAVX > 0) &&
20503 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20504 match(Set dst (SubVI src (LoadVector mem)));
20505 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20506 ins_encode %{
20507 int vlen_enc = vector_length_encoding(this);
20508 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20509 %}
20510 ins_pipe( pipe_slow );
20511 %}
20512
20513 // Longs vector sub
20514 instruct vsubL(vec dst, vec src) %{
20515 predicate(UseAVX == 0);
20516 match(Set dst (SubVL dst src));
20517 format %{ "psubq $dst,$src\t! sub packedL" %}
20518 ins_encode %{
20519 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20520 %}
20521 ins_pipe( pipe_slow );
20522 %}
20523
20524 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20525 predicate(UseAVX > 0);
20526 match(Set dst (SubVL src1 src2));
20527 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20528 ins_encode %{
20529 int vlen_enc = vector_length_encoding(this);
20530 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20531 %}
20532 ins_pipe( pipe_slow );
20533 %}
20534
20535
20536 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20537 predicate((UseAVX > 0) &&
20538 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20539 match(Set dst (SubVL src (LoadVector mem)));
20540 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20541 ins_encode %{
20542 int vlen_enc = vector_length_encoding(this);
20543 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20544 %}
20545 ins_pipe( pipe_slow );
20546 %}
20547
20548 // Floats vector sub
20549 instruct vsubF(vec dst, vec src) %{
20550 predicate(UseAVX == 0);
20551 match(Set dst (SubVF dst src));
20552 format %{ "subps $dst,$src\t! sub packedF" %}
20553 ins_encode %{
20554 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20555 %}
20556 ins_pipe( pipe_slow );
20557 %}
20558
20559 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20560 predicate(UseAVX > 0);
20561 match(Set dst (SubVF src1 src2));
20562 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20563 ins_encode %{
20564 int vlen_enc = vector_length_encoding(this);
20565 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20566 %}
20567 ins_pipe( pipe_slow );
20568 %}
20569
20570 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20571 predicate((UseAVX > 0) &&
20572 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20573 match(Set dst (SubVF src (LoadVector mem)));
20574 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20575 ins_encode %{
20576 int vlen_enc = vector_length_encoding(this);
20577 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20578 %}
20579 ins_pipe( pipe_slow );
20580 %}
20581
20582 // Doubles vector sub
20583 instruct vsubD(vec dst, vec src) %{
20584 predicate(UseAVX == 0);
20585 match(Set dst (SubVD dst src));
20586 format %{ "subpd $dst,$src\t! sub packedD" %}
20587 ins_encode %{
20588 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20589 %}
20590 ins_pipe( pipe_slow );
20591 %}
20592
20593 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20594 predicate(UseAVX > 0);
20595 match(Set dst (SubVD src1 src2));
20596 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20597 ins_encode %{
20598 int vlen_enc = vector_length_encoding(this);
20599 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20600 %}
20601 ins_pipe( pipe_slow );
20602 %}
20603
20604 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20605 predicate((UseAVX > 0) &&
20606 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20607 match(Set dst (SubVD src (LoadVector mem)));
20608 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20609 ins_encode %{
20610 int vlen_enc = vector_length_encoding(this);
20611 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20612 %}
20613 ins_pipe( pipe_slow );
20614 %}
20615
20616 // --------------------------------- MUL --------------------------------------
20617
20618 // Byte vector mul
20619 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20620 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20621 match(Set dst (MulVB src1 src2));
20622 effect(TEMP dst, TEMP xtmp);
20623 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20624 ins_encode %{
20625 assert(UseSSE > 3, "required");
20626 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20627 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20628 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20629 __ psllw($dst$$XMMRegister, 8);
20630 __ psrlw($dst$$XMMRegister, 8);
20631 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20632 %}
20633 ins_pipe( pipe_slow );
20634 %}
20635
20636 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20637 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20638 match(Set dst (MulVB src1 src2));
20639 effect(TEMP dst, TEMP xtmp);
20640 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20641 ins_encode %{
20642 assert(UseSSE > 3, "required");
20643 // Odd-index elements
20644 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20645 __ psrlw($dst$$XMMRegister, 8);
20646 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20647 __ psrlw($xtmp$$XMMRegister, 8);
20648 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20649 __ psllw($dst$$XMMRegister, 8);
20650 // Even-index elements
20651 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20652 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20653 __ psllw($xtmp$$XMMRegister, 8);
20654 __ psrlw($xtmp$$XMMRegister, 8);
20655 // Combine
20656 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20657 %}
20658 ins_pipe( pipe_slow );
20659 %}
20660
20661 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20662 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20663 match(Set dst (MulVB src1 src2));
20664 effect(TEMP xtmp1, TEMP xtmp2);
20665 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20666 ins_encode %{
20667 int vlen_enc = vector_length_encoding(this);
20668 // Odd-index elements
20669 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20670 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20671 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20672 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20673 // Even-index elements
20674 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20675 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20676 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20677 // Combine
20678 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20679 %}
20680 ins_pipe( pipe_slow );
20681 %}
20682
20683 // Shorts/Chars vector mul
20684 instruct vmulS(vec dst, vec src) %{
20685 predicate(UseAVX == 0);
20686 match(Set dst (MulVS dst src));
20687 format %{ "pmullw $dst,$src\t! mul packedS" %}
20688 ins_encode %{
20689 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20690 %}
20691 ins_pipe( pipe_slow );
20692 %}
20693
20694 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20695 predicate(UseAVX > 0);
20696 match(Set dst (MulVS src1 src2));
20697 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20698 ins_encode %{
20699 int vlen_enc = vector_length_encoding(this);
20700 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20701 %}
20702 ins_pipe( pipe_slow );
20703 %}
20704
20705 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20706 predicate((UseAVX > 0) &&
20707 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20708 match(Set dst (MulVS src (LoadVector mem)));
20709 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20710 ins_encode %{
20711 int vlen_enc = vector_length_encoding(this);
20712 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20713 %}
20714 ins_pipe( pipe_slow );
20715 %}
20716
20717 // Integers vector mul
20718 instruct vmulI(vec dst, vec src) %{
20719 predicate(UseAVX == 0);
20720 match(Set dst (MulVI dst src));
20721 format %{ "pmulld $dst,$src\t! mul packedI" %}
20722 ins_encode %{
20723 assert(UseSSE > 3, "required");
20724 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20725 %}
20726 ins_pipe( pipe_slow );
20727 %}
20728
20729 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20730 predicate(UseAVX > 0);
20731 match(Set dst (MulVI src1 src2));
20732 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20733 ins_encode %{
20734 int vlen_enc = vector_length_encoding(this);
20735 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20736 %}
20737 ins_pipe( pipe_slow );
20738 %}
20739
20740 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20741 predicate((UseAVX > 0) &&
20742 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20743 match(Set dst (MulVI src (LoadVector mem)));
20744 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20745 ins_encode %{
20746 int vlen_enc = vector_length_encoding(this);
20747 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20748 %}
20749 ins_pipe( pipe_slow );
20750 %}
20751
20752 // Longs vector mul
20753 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20754 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20755 VM_Version::supports_avx512dq()) ||
20756 VM_Version::supports_avx512vldq());
20757 match(Set dst (MulVL src1 src2));
20758 ins_cost(500);
20759 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20760 ins_encode %{
20761 assert(UseAVX > 2, "required");
20762 int vlen_enc = vector_length_encoding(this);
20763 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20764 %}
20765 ins_pipe( pipe_slow );
20766 %}
20767
20768 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20769 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20770 VM_Version::supports_avx512dq()) ||
20771 (Matcher::vector_length_in_bytes(n) > 8 &&
20772 VM_Version::supports_avx512vldq()));
20773 match(Set dst (MulVL src (LoadVector mem)));
20774 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20775 ins_cost(500);
20776 ins_encode %{
20777 assert(UseAVX > 2, "required");
20778 int vlen_enc = vector_length_encoding(this);
20779 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20780 %}
20781 ins_pipe( pipe_slow );
20782 %}
20783
20784 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20785 predicate(UseAVX == 0);
20786 match(Set dst (MulVL src1 src2));
20787 ins_cost(500);
20788 effect(TEMP dst, TEMP xtmp);
20789 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20790 ins_encode %{
20791 assert(VM_Version::supports_sse4_1(), "required");
20792 // Get the lo-hi products, only the lower 32 bits is in concerns
20793 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20794 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20795 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20796 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20797 __ psllq($dst$$XMMRegister, 32);
20798 // Get the lo-lo products
20799 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20800 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20801 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20802 %}
20803 ins_pipe( pipe_slow );
20804 %}
20805
20806 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20807 predicate(UseAVX > 0 &&
20808 ((Matcher::vector_length_in_bytes(n) == 64 &&
20809 !VM_Version::supports_avx512dq()) ||
20810 (Matcher::vector_length_in_bytes(n) < 64 &&
20811 !VM_Version::supports_avx512vldq())));
20812 match(Set dst (MulVL src1 src2));
20813 effect(TEMP xtmp1, TEMP xtmp2);
20814 ins_cost(500);
20815 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20816 ins_encode %{
20817 int vlen_enc = vector_length_encoding(this);
20818 // Get the lo-hi products, only the lower 32 bits is in concerns
20819 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20820 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20821 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20822 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20823 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20824 // Get the lo-lo products
20825 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20826 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20827 %}
20828 ins_pipe( pipe_slow );
20829 %}
20830
20831 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20832 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20833 match(Set dst (MulVL src1 src2));
20834 ins_cost(100);
20835 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20836 ins_encode %{
20837 int vlen_enc = vector_length_encoding(this);
20838 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20839 %}
20840 ins_pipe( pipe_slow );
20841 %}
20842
20843 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20844 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20845 match(Set dst (MulVL src1 src2));
20846 ins_cost(100);
20847 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20848 ins_encode %{
20849 int vlen_enc = vector_length_encoding(this);
20850 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20851 %}
20852 ins_pipe( pipe_slow );
20853 %}
20854
20855 // Floats vector mul
20856 instruct vmulF(vec dst, vec src) %{
20857 predicate(UseAVX == 0);
20858 match(Set dst (MulVF dst src));
20859 format %{ "mulps $dst,$src\t! mul packedF" %}
20860 ins_encode %{
20861 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20862 %}
20863 ins_pipe( pipe_slow );
20864 %}
20865
20866 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20867 predicate(UseAVX > 0);
20868 match(Set dst (MulVF src1 src2));
20869 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20870 ins_encode %{
20871 int vlen_enc = vector_length_encoding(this);
20872 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20873 %}
20874 ins_pipe( pipe_slow );
20875 %}
20876
20877 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20878 predicate((UseAVX > 0) &&
20879 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20880 match(Set dst (MulVF src (LoadVector mem)));
20881 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20882 ins_encode %{
20883 int vlen_enc = vector_length_encoding(this);
20884 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20885 %}
20886 ins_pipe( pipe_slow );
20887 %}
20888
20889 // Doubles vector mul
20890 instruct vmulD(vec dst, vec src) %{
20891 predicate(UseAVX == 0);
20892 match(Set dst (MulVD dst src));
20893 format %{ "mulpd $dst,$src\t! mul packedD" %}
20894 ins_encode %{
20895 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20896 %}
20897 ins_pipe( pipe_slow );
20898 %}
20899
20900 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20901 predicate(UseAVX > 0);
20902 match(Set dst (MulVD src1 src2));
20903 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20904 ins_encode %{
20905 int vlen_enc = vector_length_encoding(this);
20906 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20907 %}
20908 ins_pipe( pipe_slow );
20909 %}
20910
20911 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20912 predicate((UseAVX > 0) &&
20913 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20914 match(Set dst (MulVD src (LoadVector mem)));
20915 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20916 ins_encode %{
20917 int vlen_enc = vector_length_encoding(this);
20918 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20919 %}
20920 ins_pipe( pipe_slow );
20921 %}
20922
20923 // --------------------------------- DIV --------------------------------------
20924
20925 // Floats vector div
20926 instruct vdivF(vec dst, vec src) %{
20927 predicate(UseAVX == 0);
20928 match(Set dst (DivVF dst src));
20929 format %{ "divps $dst,$src\t! div packedF" %}
20930 ins_encode %{
20931 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20932 %}
20933 ins_pipe( pipe_slow );
20934 %}
20935
20936 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20937 predicate(UseAVX > 0);
20938 match(Set dst (DivVF src1 src2));
20939 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20940 ins_encode %{
20941 int vlen_enc = vector_length_encoding(this);
20942 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20943 %}
20944 ins_pipe( pipe_slow );
20945 %}
20946
20947 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20948 predicate((UseAVX > 0) &&
20949 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20950 match(Set dst (DivVF src (LoadVector mem)));
20951 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20952 ins_encode %{
20953 int vlen_enc = vector_length_encoding(this);
20954 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20955 %}
20956 ins_pipe( pipe_slow );
20957 %}
20958
20959 // Doubles vector div
20960 instruct vdivD(vec dst, vec src) %{
20961 predicate(UseAVX == 0);
20962 match(Set dst (DivVD dst src));
20963 format %{ "divpd $dst,$src\t! div packedD" %}
20964 ins_encode %{
20965 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20966 %}
20967 ins_pipe( pipe_slow );
20968 %}
20969
20970 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20971 predicate(UseAVX > 0);
20972 match(Set dst (DivVD src1 src2));
20973 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20974 ins_encode %{
20975 int vlen_enc = vector_length_encoding(this);
20976 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20977 %}
20978 ins_pipe( pipe_slow );
20979 %}
20980
20981 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20982 predicate((UseAVX > 0) &&
20983 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20984 match(Set dst (DivVD src (LoadVector mem)));
20985 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20986 ins_encode %{
20987 int vlen_enc = vector_length_encoding(this);
20988 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20989 %}
20990 ins_pipe( pipe_slow );
20991 %}
20992
20993 // ------------------------------ MinMax ---------------------------------------
20994
20995 // Byte, Short, Int vector Min/Max
20996 instruct minmax_reg_sse(vec dst, vec src) %{
20997 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20998 UseAVX == 0);
20999 match(Set dst (MinV dst src));
21000 match(Set dst (MaxV dst src));
21001 format %{ "vector_minmax $dst,$src\t! " %}
21002 ins_encode %{
21003 assert(UseSSE >= 4, "required");
21004
21005 int opcode = this->ideal_Opcode();
21006 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21007 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21008 %}
21009 ins_pipe( pipe_slow );
21010 %}
21011
21012 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21013 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21014 UseAVX > 0);
21015 match(Set dst (MinV src1 src2));
21016 match(Set dst (MaxV src1 src2));
21017 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21018 ins_encode %{
21019 int opcode = this->ideal_Opcode();
21020 int vlen_enc = vector_length_encoding(this);
21021 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21022
21023 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21024 %}
21025 ins_pipe( pipe_slow );
21026 %}
21027
21028 // Long vector Min/Max
21029 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21030 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21031 UseAVX == 0);
21032 match(Set dst (MinV dst src));
21033 match(Set dst (MaxV src dst));
21034 effect(TEMP dst, TEMP tmp);
21035 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21036 ins_encode %{
21037 assert(UseSSE >= 4, "required");
21038
21039 int opcode = this->ideal_Opcode();
21040 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21041 assert(elem_bt == T_LONG, "sanity");
21042
21043 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21044 %}
21045 ins_pipe( pipe_slow );
21046 %}
21047
21048 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21049 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21050 UseAVX > 0 && !VM_Version::supports_avx512vl());
21051 match(Set dst (MinV src1 src2));
21052 match(Set dst (MaxV src1 src2));
21053 effect(TEMP dst);
21054 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21055 ins_encode %{
21056 int vlen_enc = vector_length_encoding(this);
21057 int opcode = this->ideal_Opcode();
21058 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21059 assert(elem_bt == T_LONG, "sanity");
21060
21061 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21062 %}
21063 ins_pipe( pipe_slow );
21064 %}
21065
21066 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21067 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21068 Matcher::vector_element_basic_type(n) == T_LONG);
21069 match(Set dst (MinV src1 src2));
21070 match(Set dst (MaxV src1 src2));
21071 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21072 ins_encode %{
21073 assert(UseAVX > 2, "required");
21074
21075 int vlen_enc = vector_length_encoding(this);
21076 int opcode = this->ideal_Opcode();
21077 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21078 assert(elem_bt == T_LONG, "sanity");
21079
21080 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21081 %}
21082 ins_pipe( pipe_slow );
21083 %}
21084
21085 // Float/Double vector Min/Max
21086 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21087 predicate(VM_Version::supports_avx10_2() &&
21088 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21089 match(Set dst (MinV a b));
21090 match(Set dst (MaxV a b));
21091 format %{ "vector_minmaxFP $dst, $a, $b" %}
21092 ins_encode %{
21093 int vlen_enc = vector_length_encoding(this);
21094 int opcode = this->ideal_Opcode();
21095 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21096 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21097 %}
21098 ins_pipe( pipe_slow );
21099 %}
21100
21101 // Float/Double vector Min/Max
21102 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21103 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21104 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21105 UseAVX > 0);
21106 match(Set dst (MinV a b));
21107 match(Set dst (MaxV a b));
21108 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21109 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21110 ins_encode %{
21111 assert(UseAVX > 0, "required");
21112
21113 int opcode = this->ideal_Opcode();
21114 int vlen_enc = vector_length_encoding(this);
21115 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21116
21117 __ vminmax_fp(opcode, elem_bt,
21118 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21119 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21120 %}
21121 ins_pipe( pipe_slow );
21122 %}
21123
21124 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21125 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21126 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21127 match(Set dst (MinV a b));
21128 match(Set dst (MaxV a b));
21129 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21130 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21131 ins_encode %{
21132 assert(UseAVX > 2, "required");
21133
21134 int opcode = this->ideal_Opcode();
21135 int vlen_enc = vector_length_encoding(this);
21136 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21137
21138 __ evminmax_fp(opcode, elem_bt,
21139 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21140 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21141 %}
21142 ins_pipe( pipe_slow );
21143 %}
21144
21145 // ------------------------------ Unsigned vector Min/Max ----------------------
21146
21147 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21148 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21149 match(Set dst (UMinV a b));
21150 match(Set dst (UMaxV a b));
21151 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21152 ins_encode %{
21153 int opcode = this->ideal_Opcode();
21154 int vlen_enc = vector_length_encoding(this);
21155 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21156 assert(is_integral_type(elem_bt), "");
21157 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21158 %}
21159 ins_pipe( pipe_slow );
21160 %}
21161
21162 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21163 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21164 match(Set dst (UMinV a (LoadVector b)));
21165 match(Set dst (UMaxV a (LoadVector b)));
21166 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21167 ins_encode %{
21168 int opcode = this->ideal_Opcode();
21169 int vlen_enc = vector_length_encoding(this);
21170 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21171 assert(is_integral_type(elem_bt), "");
21172 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21173 %}
21174 ins_pipe( pipe_slow );
21175 %}
21176
21177 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21178 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21179 match(Set dst (UMinV a b));
21180 match(Set dst (UMaxV a b));
21181 effect(TEMP xtmp1, TEMP xtmp2);
21182 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21183 ins_encode %{
21184 int opcode = this->ideal_Opcode();
21185 int vlen_enc = vector_length_encoding(this);
21186 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21187 %}
21188 ins_pipe( pipe_slow );
21189 %}
21190
21191 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21192 match(Set dst (UMinV (Binary dst src2) mask));
21193 match(Set dst (UMaxV (Binary dst src2) mask));
21194 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21195 ins_encode %{
21196 int vlen_enc = vector_length_encoding(this);
21197 BasicType bt = Matcher::vector_element_basic_type(this);
21198 int opc = this->ideal_Opcode();
21199 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21200 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21201 %}
21202 ins_pipe( pipe_slow );
21203 %}
21204
21205 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21206 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21207 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21208 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21209 ins_encode %{
21210 int vlen_enc = vector_length_encoding(this);
21211 BasicType bt = Matcher::vector_element_basic_type(this);
21212 int opc = this->ideal_Opcode();
21213 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21214 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21215 %}
21216 ins_pipe( pipe_slow );
21217 %}
21218
21219 // --------------------------------- Signum/CopySign ---------------------------
21220
21221 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21222 match(Set dst (SignumF dst (Binary zero one)));
21223 effect(KILL cr);
21224 format %{ "signumF $dst, $dst" %}
21225 ins_encode %{
21226 int opcode = this->ideal_Opcode();
21227 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21228 %}
21229 ins_pipe( pipe_slow );
21230 %}
21231
21232 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21233 match(Set dst (SignumD dst (Binary zero one)));
21234 effect(KILL cr);
21235 format %{ "signumD $dst, $dst" %}
21236 ins_encode %{
21237 int opcode = this->ideal_Opcode();
21238 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21239 %}
21240 ins_pipe( pipe_slow );
21241 %}
21242
21243 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21244 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21245 match(Set dst (SignumVF src (Binary zero one)));
21246 match(Set dst (SignumVD src (Binary zero one)));
21247 effect(TEMP dst, TEMP xtmp1);
21248 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21249 ins_encode %{
21250 int opcode = this->ideal_Opcode();
21251 int vec_enc = vector_length_encoding(this);
21252 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21253 $xtmp1$$XMMRegister, vec_enc);
21254 %}
21255 ins_pipe( pipe_slow );
21256 %}
21257
21258 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21259 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21260 match(Set dst (SignumVF src (Binary zero one)));
21261 match(Set dst (SignumVD src (Binary zero one)));
21262 effect(TEMP dst, TEMP ktmp1);
21263 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21264 ins_encode %{
21265 int opcode = this->ideal_Opcode();
21266 int vec_enc = vector_length_encoding(this);
21267 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21268 $ktmp1$$KRegister, vec_enc);
21269 %}
21270 ins_pipe( pipe_slow );
21271 %}
21272
21273 // ---------------------------------------
21274 // For copySign use 0xE4 as writemask for vpternlog
21275 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21276 // C (xmm2) is set to 0x7FFFFFFF
21277 // Wherever xmm2 is 0, we want to pick from B (sign)
21278 // Wherever xmm2 is 1, we want to pick from A (src)
21279 //
21280 // A B C Result
21281 // 0 0 0 0
21282 // 0 0 1 0
21283 // 0 1 0 1
21284 // 0 1 1 0
21285 // 1 0 0 0
21286 // 1 0 1 1
21287 // 1 1 0 1
21288 // 1 1 1 1
21289 //
21290 // Result going from high bit to low bit is 0x11100100 = 0xe4
21291 // ---------------------------------------
21292
21293 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21294 match(Set dst (CopySignF dst src));
21295 effect(TEMP tmp1, TEMP tmp2);
21296 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21297 ins_encode %{
21298 __ movl($tmp2$$Register, 0x7FFFFFFF);
21299 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21300 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21301 %}
21302 ins_pipe( pipe_slow );
21303 %}
21304
21305 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21306 match(Set dst (CopySignD dst (Binary src zero)));
21307 ins_cost(100);
21308 effect(TEMP tmp1, TEMP tmp2);
21309 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21310 ins_encode %{
21311 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21312 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21313 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21314 %}
21315 ins_pipe( pipe_slow );
21316 %}
21317
21318 //----------------------------- CompressBits/ExpandBits ------------------------
21319
21320 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21321 predicate(n->bottom_type()->isa_int());
21322 match(Set dst (CompressBits src mask));
21323 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21324 ins_encode %{
21325 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21326 %}
21327 ins_pipe( pipe_slow );
21328 %}
21329
21330 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21331 predicate(n->bottom_type()->isa_int());
21332 match(Set dst (ExpandBits src mask));
21333 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21334 ins_encode %{
21335 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21336 %}
21337 ins_pipe( pipe_slow );
21338 %}
21339
21340 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21341 predicate(n->bottom_type()->isa_int());
21342 match(Set dst (CompressBits src (LoadI mask)));
21343 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21344 ins_encode %{
21345 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21346 %}
21347 ins_pipe( pipe_slow );
21348 %}
21349
21350 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21351 predicate(n->bottom_type()->isa_int());
21352 match(Set dst (ExpandBits src (LoadI mask)));
21353 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21354 ins_encode %{
21355 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21356 %}
21357 ins_pipe( pipe_slow );
21358 %}
21359
21360 // --------------------------------- Sqrt --------------------------------------
21361
21362 instruct vsqrtF_reg(vec dst, vec src) %{
21363 match(Set dst (SqrtVF src));
21364 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21365 ins_encode %{
21366 assert(UseAVX > 0, "required");
21367 int vlen_enc = vector_length_encoding(this);
21368 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21369 %}
21370 ins_pipe( pipe_slow );
21371 %}
21372
21373 instruct vsqrtF_mem(vec dst, memory mem) %{
21374 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21375 match(Set dst (SqrtVF (LoadVector mem)));
21376 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21377 ins_encode %{
21378 assert(UseAVX > 0, "required");
21379 int vlen_enc = vector_length_encoding(this);
21380 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21381 %}
21382 ins_pipe( pipe_slow );
21383 %}
21384
21385 // Floating point vector sqrt
21386 instruct vsqrtD_reg(vec dst, vec src) %{
21387 match(Set dst (SqrtVD src));
21388 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21389 ins_encode %{
21390 assert(UseAVX > 0, "required");
21391 int vlen_enc = vector_length_encoding(this);
21392 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21393 %}
21394 ins_pipe( pipe_slow );
21395 %}
21396
21397 instruct vsqrtD_mem(vec dst, memory mem) %{
21398 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21399 match(Set dst (SqrtVD (LoadVector mem)));
21400 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21401 ins_encode %{
21402 assert(UseAVX > 0, "required");
21403 int vlen_enc = vector_length_encoding(this);
21404 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21405 %}
21406 ins_pipe( pipe_slow );
21407 %}
21408
21409 // ------------------------------ Shift ---------------------------------------
21410
21411 // Left and right shift count vectors are the same on x86
21412 // (only lowest bits of xmm reg are used for count).
21413 instruct vshiftcnt(vec dst, rRegI cnt) %{
21414 match(Set dst (LShiftCntV cnt));
21415 match(Set dst (RShiftCntV cnt));
21416 format %{ "movdl $dst,$cnt\t! load shift count" %}
21417 ins_encode %{
21418 __ movdl($dst$$XMMRegister, $cnt$$Register);
21419 %}
21420 ins_pipe( pipe_slow );
21421 %}
21422
21423 // Byte vector shift
21424 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21425 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21426 match(Set dst ( LShiftVB src shift));
21427 match(Set dst ( RShiftVB src shift));
21428 match(Set dst (URShiftVB src shift));
21429 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21430 format %{"vector_byte_shift $dst,$src,$shift" %}
21431 ins_encode %{
21432 assert(UseSSE > 3, "required");
21433 int opcode = this->ideal_Opcode();
21434 bool sign = (opcode != Op_URShiftVB);
21435 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21436 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21437 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21438 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21439 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21440 %}
21441 ins_pipe( pipe_slow );
21442 %}
21443
21444 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21445 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21446 UseAVX <= 1);
21447 match(Set dst ( LShiftVB src shift));
21448 match(Set dst ( RShiftVB src shift));
21449 match(Set dst (URShiftVB src shift));
21450 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21451 format %{"vector_byte_shift $dst,$src,$shift" %}
21452 ins_encode %{
21453 assert(UseSSE > 3, "required");
21454 int opcode = this->ideal_Opcode();
21455 bool sign = (opcode != Op_URShiftVB);
21456 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21457 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21458 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21459 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21460 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21461 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21462 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21463 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21464 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21465 %}
21466 ins_pipe( pipe_slow );
21467 %}
21468
21469 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21470 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21471 UseAVX > 1);
21472 match(Set dst ( LShiftVB src shift));
21473 match(Set dst ( RShiftVB src shift));
21474 match(Set dst (URShiftVB src shift));
21475 effect(TEMP dst, TEMP tmp);
21476 format %{"vector_byte_shift $dst,$src,$shift" %}
21477 ins_encode %{
21478 int opcode = this->ideal_Opcode();
21479 bool sign = (opcode != Op_URShiftVB);
21480 int vlen_enc = Assembler::AVX_256bit;
21481 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21482 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21483 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21484 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21485 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21486 %}
21487 ins_pipe( pipe_slow );
21488 %}
21489
21490 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21491 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21492 match(Set dst ( LShiftVB src shift));
21493 match(Set dst ( RShiftVB src shift));
21494 match(Set dst (URShiftVB src shift));
21495 effect(TEMP dst, TEMP tmp);
21496 format %{"vector_byte_shift $dst,$src,$shift" %}
21497 ins_encode %{
21498 assert(UseAVX > 1, "required");
21499 int opcode = this->ideal_Opcode();
21500 bool sign = (opcode != Op_URShiftVB);
21501 int vlen_enc = Assembler::AVX_256bit;
21502 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21503 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21504 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21505 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21506 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21507 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21508 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21509 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21510 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21511 %}
21512 ins_pipe( pipe_slow );
21513 %}
21514
21515 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21516 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21517 match(Set dst ( LShiftVB src shift));
21518 match(Set dst (RShiftVB src shift));
21519 match(Set dst (URShiftVB src shift));
21520 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21521 format %{"vector_byte_shift $dst,$src,$shift" %}
21522 ins_encode %{
21523 assert(UseAVX > 2, "required");
21524 int opcode = this->ideal_Opcode();
21525 bool sign = (opcode != Op_URShiftVB);
21526 int vlen_enc = Assembler::AVX_512bit;
21527 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21528 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21529 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21530 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21531 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21532 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21533 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21534 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21535 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21536 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21537 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21538 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21539 %}
21540 ins_pipe( pipe_slow );
21541 %}
21542
21543 // Shorts vector logical right shift produces incorrect Java result
21544 // for negative data because java code convert short value into int with
21545 // sign extension before a shift. But char vectors are fine since chars are
21546 // unsigned values.
21547 // Shorts/Chars vector left shift
21548 instruct vshiftS(vec dst, vec src, vec shift) %{
21549 predicate(!n->as_ShiftV()->is_var_shift());
21550 match(Set dst ( LShiftVS src shift));
21551 match(Set dst ( RShiftVS src shift));
21552 match(Set dst (URShiftVS src shift));
21553 effect(TEMP dst, USE src, USE shift);
21554 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21555 ins_encode %{
21556 int opcode = this->ideal_Opcode();
21557 if (UseAVX > 0) {
21558 int vlen_enc = vector_length_encoding(this);
21559 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21560 } else {
21561 int vlen = Matcher::vector_length(this);
21562 if (vlen == 2) {
21563 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21564 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21565 } else if (vlen == 4) {
21566 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21567 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21568 } else {
21569 assert (vlen == 8, "sanity");
21570 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21571 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21572 }
21573 }
21574 %}
21575 ins_pipe( pipe_slow );
21576 %}
21577
21578 // Integers vector left shift
21579 instruct vshiftI(vec dst, vec src, vec shift) %{
21580 predicate(!n->as_ShiftV()->is_var_shift());
21581 match(Set dst ( LShiftVI src shift));
21582 match(Set dst ( RShiftVI src shift));
21583 match(Set dst (URShiftVI src shift));
21584 effect(TEMP dst, USE src, USE shift);
21585 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21586 ins_encode %{
21587 int opcode = this->ideal_Opcode();
21588 if (UseAVX > 0) {
21589 int vlen_enc = vector_length_encoding(this);
21590 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21591 } else {
21592 int vlen = Matcher::vector_length(this);
21593 if (vlen == 2) {
21594 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21595 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21596 } else {
21597 assert(vlen == 4, "sanity");
21598 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21599 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21600 }
21601 }
21602 %}
21603 ins_pipe( pipe_slow );
21604 %}
21605
21606 // Integers vector left constant shift
21607 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21608 match(Set dst (LShiftVI src (LShiftCntV shift)));
21609 match(Set dst (RShiftVI src (RShiftCntV shift)));
21610 match(Set dst (URShiftVI src (RShiftCntV shift)));
21611 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21612 ins_encode %{
21613 int opcode = this->ideal_Opcode();
21614 if (UseAVX > 0) {
21615 int vector_len = vector_length_encoding(this);
21616 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21617 } else {
21618 int vlen = Matcher::vector_length(this);
21619 if (vlen == 2) {
21620 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21621 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21622 } else {
21623 assert(vlen == 4, "sanity");
21624 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21625 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21626 }
21627 }
21628 %}
21629 ins_pipe( pipe_slow );
21630 %}
21631
21632 // Longs vector shift
21633 instruct vshiftL(vec dst, vec src, vec shift) %{
21634 predicate(!n->as_ShiftV()->is_var_shift());
21635 match(Set dst ( LShiftVL src shift));
21636 match(Set dst (URShiftVL src shift));
21637 effect(TEMP dst, USE src, USE shift);
21638 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21639 ins_encode %{
21640 int opcode = this->ideal_Opcode();
21641 if (UseAVX > 0) {
21642 int vlen_enc = vector_length_encoding(this);
21643 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21644 } else {
21645 assert(Matcher::vector_length(this) == 2, "");
21646 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21647 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21648 }
21649 %}
21650 ins_pipe( pipe_slow );
21651 %}
21652
21653 // Longs vector constant shift
21654 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21655 match(Set dst (LShiftVL src (LShiftCntV shift)));
21656 match(Set dst (URShiftVL src (RShiftCntV shift)));
21657 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21658 ins_encode %{
21659 int opcode = this->ideal_Opcode();
21660 if (UseAVX > 0) {
21661 int vector_len = vector_length_encoding(this);
21662 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21663 } else {
21664 assert(Matcher::vector_length(this) == 2, "");
21665 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21666 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21667 }
21668 %}
21669 ins_pipe( pipe_slow );
21670 %}
21671
21672 // -------------------ArithmeticRightShift -----------------------------------
21673 // Long vector arithmetic right shift
21674 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21675 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21676 match(Set dst (RShiftVL src shift));
21677 effect(TEMP dst, TEMP tmp);
21678 format %{ "vshiftq $dst,$src,$shift" %}
21679 ins_encode %{
21680 uint vlen = Matcher::vector_length(this);
21681 if (vlen == 2) {
21682 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21683 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21684 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21685 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21686 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21687 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21688 } else {
21689 assert(vlen == 4, "sanity");
21690 assert(UseAVX > 1, "required");
21691 int vlen_enc = Assembler::AVX_256bit;
21692 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21693 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21694 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21695 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21696 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21697 }
21698 %}
21699 ins_pipe( pipe_slow );
21700 %}
21701
21702 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21703 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21704 match(Set dst (RShiftVL src shift));
21705 format %{ "vshiftq $dst,$src,$shift" %}
21706 ins_encode %{
21707 int vlen_enc = vector_length_encoding(this);
21708 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21709 %}
21710 ins_pipe( pipe_slow );
21711 %}
21712
21713 // ------------------- Variable Shift -----------------------------
21714 // Byte variable shift
21715 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21716 predicate(Matcher::vector_length(n) <= 8 &&
21717 n->as_ShiftV()->is_var_shift() &&
21718 !VM_Version::supports_avx512bw());
21719 match(Set dst ( LShiftVB src shift));
21720 match(Set dst ( RShiftVB src shift));
21721 match(Set dst (URShiftVB src shift));
21722 effect(TEMP dst, TEMP vtmp);
21723 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21724 ins_encode %{
21725 assert(UseAVX >= 2, "required");
21726
21727 int opcode = this->ideal_Opcode();
21728 int vlen_enc = Assembler::AVX_128bit;
21729 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21730 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21731 %}
21732 ins_pipe( pipe_slow );
21733 %}
21734
21735 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21736 predicate(Matcher::vector_length(n) == 16 &&
21737 n->as_ShiftV()->is_var_shift() &&
21738 !VM_Version::supports_avx512bw());
21739 match(Set dst ( LShiftVB src shift));
21740 match(Set dst ( RShiftVB src shift));
21741 match(Set dst (URShiftVB src shift));
21742 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21743 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21744 ins_encode %{
21745 assert(UseAVX >= 2, "required");
21746
21747 int opcode = this->ideal_Opcode();
21748 int vlen_enc = Assembler::AVX_128bit;
21749 // Shift lower half and get word result in dst
21750 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21751
21752 // Shift upper half and get word result in vtmp1
21753 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21754 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21755 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21756
21757 // Merge and down convert the two word results to byte in dst
21758 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21759 %}
21760 ins_pipe( pipe_slow );
21761 %}
21762
21763 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21764 predicate(Matcher::vector_length(n) == 32 &&
21765 n->as_ShiftV()->is_var_shift() &&
21766 !VM_Version::supports_avx512bw());
21767 match(Set dst ( LShiftVB src shift));
21768 match(Set dst ( RShiftVB src shift));
21769 match(Set dst (URShiftVB src shift));
21770 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21771 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21772 ins_encode %{
21773 assert(UseAVX >= 2, "required");
21774
21775 int opcode = this->ideal_Opcode();
21776 int vlen_enc = Assembler::AVX_128bit;
21777 // Process lower 128 bits and get result in dst
21778 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21779 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21780 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21781 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21782 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21783
21784 // Process higher 128 bits and get result in vtmp3
21785 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21786 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21787 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21788 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21789 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21790 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21791 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21792
21793 // Merge the two results in dst
21794 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21795 %}
21796 ins_pipe( pipe_slow );
21797 %}
21798
21799 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21800 predicate(Matcher::vector_length(n) <= 32 &&
21801 n->as_ShiftV()->is_var_shift() &&
21802 VM_Version::supports_avx512bw());
21803 match(Set dst ( LShiftVB src shift));
21804 match(Set dst ( RShiftVB src shift));
21805 match(Set dst (URShiftVB src shift));
21806 effect(TEMP dst, TEMP vtmp);
21807 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21808 ins_encode %{
21809 assert(UseAVX > 2, "required");
21810
21811 int opcode = this->ideal_Opcode();
21812 int vlen_enc = vector_length_encoding(this);
21813 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21814 %}
21815 ins_pipe( pipe_slow );
21816 %}
21817
21818 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21819 predicate(Matcher::vector_length(n) == 64 &&
21820 n->as_ShiftV()->is_var_shift() &&
21821 VM_Version::supports_avx512bw());
21822 match(Set dst ( LShiftVB src shift));
21823 match(Set dst ( RShiftVB src shift));
21824 match(Set dst (URShiftVB src shift));
21825 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21826 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21827 ins_encode %{
21828 assert(UseAVX > 2, "required");
21829
21830 int opcode = this->ideal_Opcode();
21831 int vlen_enc = Assembler::AVX_256bit;
21832 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21833 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21834 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21835 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21836 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21837 %}
21838 ins_pipe( pipe_slow );
21839 %}
21840
21841 // Short variable shift
21842 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21843 predicate(Matcher::vector_length(n) <= 8 &&
21844 n->as_ShiftV()->is_var_shift() &&
21845 !VM_Version::supports_avx512bw());
21846 match(Set dst ( LShiftVS src shift));
21847 match(Set dst ( RShiftVS src shift));
21848 match(Set dst (URShiftVS src shift));
21849 effect(TEMP dst, TEMP vtmp);
21850 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21851 ins_encode %{
21852 assert(UseAVX >= 2, "required");
21853
21854 int opcode = this->ideal_Opcode();
21855 bool sign = (opcode != Op_URShiftVS);
21856 int vlen_enc = Assembler::AVX_256bit;
21857 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21858 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21859 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21860 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21861 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21862 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21863 %}
21864 ins_pipe( pipe_slow );
21865 %}
21866
21867 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21868 predicate(Matcher::vector_length(n) == 16 &&
21869 n->as_ShiftV()->is_var_shift() &&
21870 !VM_Version::supports_avx512bw());
21871 match(Set dst ( LShiftVS src shift));
21872 match(Set dst ( RShiftVS src shift));
21873 match(Set dst (URShiftVS src shift));
21874 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21875 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21876 ins_encode %{
21877 assert(UseAVX >= 2, "required");
21878
21879 int opcode = this->ideal_Opcode();
21880 bool sign = (opcode != Op_URShiftVS);
21881 int vlen_enc = Assembler::AVX_256bit;
21882 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21883 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21884 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21885 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21886 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21887
21888 // Shift upper half, with result in dst using vtmp1 as TEMP
21889 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21890 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21891 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21892 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21893 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21894 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21895
21896 // Merge lower and upper half result into dst
21897 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21898 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21899 %}
21900 ins_pipe( pipe_slow );
21901 %}
21902
21903 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21904 predicate(n->as_ShiftV()->is_var_shift() &&
21905 VM_Version::supports_avx512bw());
21906 match(Set dst ( LShiftVS src shift));
21907 match(Set dst ( RShiftVS src shift));
21908 match(Set dst (URShiftVS src shift));
21909 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21910 ins_encode %{
21911 assert(UseAVX > 2, "required");
21912
21913 int opcode = this->ideal_Opcode();
21914 int vlen_enc = vector_length_encoding(this);
21915 if (!VM_Version::supports_avx512vl()) {
21916 vlen_enc = Assembler::AVX_512bit;
21917 }
21918 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21919 %}
21920 ins_pipe( pipe_slow );
21921 %}
21922
21923 //Integer variable shift
21924 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21925 predicate(n->as_ShiftV()->is_var_shift());
21926 match(Set dst ( LShiftVI src shift));
21927 match(Set dst ( RShiftVI src shift));
21928 match(Set dst (URShiftVI src shift));
21929 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21930 ins_encode %{
21931 assert(UseAVX >= 2, "required");
21932
21933 int opcode = this->ideal_Opcode();
21934 int vlen_enc = vector_length_encoding(this);
21935 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21936 %}
21937 ins_pipe( pipe_slow );
21938 %}
21939
21940 //Long variable shift
21941 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21942 predicate(n->as_ShiftV()->is_var_shift());
21943 match(Set dst ( LShiftVL src shift));
21944 match(Set dst (URShiftVL src shift));
21945 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21946 ins_encode %{
21947 assert(UseAVX >= 2, "required");
21948
21949 int opcode = this->ideal_Opcode();
21950 int vlen_enc = vector_length_encoding(this);
21951 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21952 %}
21953 ins_pipe( pipe_slow );
21954 %}
21955
21956 //Long variable right shift arithmetic
21957 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21958 predicate(Matcher::vector_length(n) <= 4 &&
21959 n->as_ShiftV()->is_var_shift() &&
21960 UseAVX == 2);
21961 match(Set dst (RShiftVL src shift));
21962 effect(TEMP dst, TEMP vtmp);
21963 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21964 ins_encode %{
21965 int opcode = this->ideal_Opcode();
21966 int vlen_enc = vector_length_encoding(this);
21967 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21968 $vtmp$$XMMRegister);
21969 %}
21970 ins_pipe( pipe_slow );
21971 %}
21972
21973 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21974 predicate(n->as_ShiftV()->is_var_shift() &&
21975 UseAVX > 2);
21976 match(Set dst (RShiftVL src shift));
21977 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21978 ins_encode %{
21979 int opcode = this->ideal_Opcode();
21980 int vlen_enc = vector_length_encoding(this);
21981 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21982 %}
21983 ins_pipe( pipe_slow );
21984 %}
21985
21986 // --------------------------------- AND --------------------------------------
21987
21988 instruct vand(vec dst, vec src) %{
21989 predicate(UseAVX == 0);
21990 match(Set dst (AndV dst src));
21991 format %{ "pand $dst,$src\t! and vectors" %}
21992 ins_encode %{
21993 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21994 %}
21995 ins_pipe( pipe_slow );
21996 %}
21997
21998 instruct vand_reg(vec dst, vec src1, vec src2) %{
21999 predicate(UseAVX > 0);
22000 match(Set dst (AndV src1 src2));
22001 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22002 ins_encode %{
22003 int vlen_enc = vector_length_encoding(this);
22004 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22005 %}
22006 ins_pipe( pipe_slow );
22007 %}
22008
22009 instruct vand_mem(vec dst, vec src, memory mem) %{
22010 predicate((UseAVX > 0) &&
22011 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22012 match(Set dst (AndV src (LoadVector mem)));
22013 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22014 ins_encode %{
22015 int vlen_enc = vector_length_encoding(this);
22016 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22017 %}
22018 ins_pipe( pipe_slow );
22019 %}
22020
22021 // --------------------------------- OR ---------------------------------------
22022
22023 instruct vor(vec dst, vec src) %{
22024 predicate(UseAVX == 0);
22025 match(Set dst (OrV dst src));
22026 format %{ "por $dst,$src\t! or vectors" %}
22027 ins_encode %{
22028 __ por($dst$$XMMRegister, $src$$XMMRegister);
22029 %}
22030 ins_pipe( pipe_slow );
22031 %}
22032
22033 instruct vor_reg(vec dst, vec src1, vec src2) %{
22034 predicate(UseAVX > 0);
22035 match(Set dst (OrV src1 src2));
22036 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22037 ins_encode %{
22038 int vlen_enc = vector_length_encoding(this);
22039 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22040 %}
22041 ins_pipe( pipe_slow );
22042 %}
22043
22044 instruct vor_mem(vec dst, vec src, memory mem) %{
22045 predicate((UseAVX > 0) &&
22046 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22047 match(Set dst (OrV src (LoadVector mem)));
22048 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22049 ins_encode %{
22050 int vlen_enc = vector_length_encoding(this);
22051 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22052 %}
22053 ins_pipe( pipe_slow );
22054 %}
22055
22056 // --------------------------------- XOR --------------------------------------
22057
22058 instruct vxor(vec dst, vec src) %{
22059 predicate(UseAVX == 0);
22060 match(Set dst (XorV dst src));
22061 format %{ "pxor $dst,$src\t! xor vectors" %}
22062 ins_encode %{
22063 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22064 %}
22065 ins_pipe( pipe_slow );
22066 %}
22067
22068 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22069 predicate(UseAVX > 0);
22070 match(Set dst (XorV src1 src2));
22071 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22072 ins_encode %{
22073 int vlen_enc = vector_length_encoding(this);
22074 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22075 %}
22076 ins_pipe( pipe_slow );
22077 %}
22078
22079 instruct vxor_mem(vec dst, vec src, memory mem) %{
22080 predicate((UseAVX > 0) &&
22081 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22082 match(Set dst (XorV src (LoadVector mem)));
22083 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22084 ins_encode %{
22085 int vlen_enc = vector_length_encoding(this);
22086 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22087 %}
22088 ins_pipe( pipe_slow );
22089 %}
22090
22091 // --------------------------------- VectorCast --------------------------------------
22092
22093 instruct vcastBtoX(vec dst, vec src) %{
22094 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22095 match(Set dst (VectorCastB2X src));
22096 format %{ "vector_cast_b2x $dst,$src\t!" %}
22097 ins_encode %{
22098 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22099 int vlen_enc = vector_length_encoding(this);
22100 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22101 %}
22102 ins_pipe( pipe_slow );
22103 %}
22104
22105 instruct vcastBtoD(legVec dst, legVec src) %{
22106 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22107 match(Set dst (VectorCastB2X src));
22108 format %{ "vector_cast_b2x $dst,$src\t!" %}
22109 ins_encode %{
22110 int vlen_enc = vector_length_encoding(this);
22111 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22112 %}
22113 ins_pipe( pipe_slow );
22114 %}
22115
22116 instruct castStoX(vec dst, vec src) %{
22117 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22118 Matcher::vector_length(n->in(1)) <= 8 && // src
22119 Matcher::vector_element_basic_type(n) == T_BYTE);
22120 match(Set dst (VectorCastS2X src));
22121 format %{ "vector_cast_s2x $dst,$src" %}
22122 ins_encode %{
22123 assert(UseAVX > 0, "required");
22124
22125 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22126 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22127 %}
22128 ins_pipe( pipe_slow );
22129 %}
22130
22131 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22132 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22133 Matcher::vector_length(n->in(1)) == 16 && // src
22134 Matcher::vector_element_basic_type(n) == T_BYTE);
22135 effect(TEMP dst, TEMP vtmp);
22136 match(Set dst (VectorCastS2X src));
22137 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22138 ins_encode %{
22139 assert(UseAVX > 0, "required");
22140
22141 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22142 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22143 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22144 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22145 %}
22146 ins_pipe( pipe_slow );
22147 %}
22148
22149 instruct vcastStoX_evex(vec dst, vec src) %{
22150 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22151 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22152 match(Set dst (VectorCastS2X src));
22153 format %{ "vector_cast_s2x $dst,$src\t!" %}
22154 ins_encode %{
22155 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22156 int src_vlen_enc = vector_length_encoding(this, $src);
22157 int vlen_enc = vector_length_encoding(this);
22158 switch (to_elem_bt) {
22159 case T_BYTE:
22160 if (!VM_Version::supports_avx512vl()) {
22161 vlen_enc = Assembler::AVX_512bit;
22162 }
22163 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22164 break;
22165 case T_INT:
22166 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22167 break;
22168 case T_FLOAT:
22169 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22170 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22171 break;
22172 case T_LONG:
22173 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22174 break;
22175 case T_DOUBLE: {
22176 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22177 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22178 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22179 break;
22180 }
22181 default:
22182 ShouldNotReachHere();
22183 }
22184 %}
22185 ins_pipe( pipe_slow );
22186 %}
22187
22188 instruct castItoX(vec dst, vec src) %{
22189 predicate(UseAVX <= 2 &&
22190 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22191 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22192 match(Set dst (VectorCastI2X src));
22193 format %{ "vector_cast_i2x $dst,$src" %}
22194 ins_encode %{
22195 assert(UseAVX > 0, "required");
22196
22197 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22198 int vlen_enc = vector_length_encoding(this, $src);
22199
22200 if (to_elem_bt == T_BYTE) {
22201 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22202 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22203 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22204 } else {
22205 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22206 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22207 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22208 }
22209 %}
22210 ins_pipe( pipe_slow );
22211 %}
22212
22213 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22214 predicate(UseAVX <= 2 &&
22215 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22216 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22217 match(Set dst (VectorCastI2X src));
22218 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22219 effect(TEMP dst, TEMP vtmp);
22220 ins_encode %{
22221 assert(UseAVX > 0, "required");
22222
22223 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22224 int vlen_enc = vector_length_encoding(this, $src);
22225
22226 if (to_elem_bt == T_BYTE) {
22227 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22228 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22229 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22230 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22231 } else {
22232 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22233 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22234 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22235 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22236 }
22237 %}
22238 ins_pipe( pipe_slow );
22239 %}
22240
22241 instruct vcastItoX_evex(vec dst, vec src) %{
22242 predicate(UseAVX > 2 ||
22243 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22244 match(Set dst (VectorCastI2X src));
22245 format %{ "vector_cast_i2x $dst,$src\t!" %}
22246 ins_encode %{
22247 assert(UseAVX > 0, "required");
22248
22249 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22250 int src_vlen_enc = vector_length_encoding(this, $src);
22251 int dst_vlen_enc = vector_length_encoding(this);
22252 switch (dst_elem_bt) {
22253 case T_BYTE:
22254 if (!VM_Version::supports_avx512vl()) {
22255 src_vlen_enc = Assembler::AVX_512bit;
22256 }
22257 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22258 break;
22259 case T_SHORT:
22260 if (!VM_Version::supports_avx512vl()) {
22261 src_vlen_enc = Assembler::AVX_512bit;
22262 }
22263 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22264 break;
22265 case T_FLOAT:
22266 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22267 break;
22268 case T_LONG:
22269 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22270 break;
22271 case T_DOUBLE:
22272 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22273 break;
22274 default:
22275 ShouldNotReachHere();
22276 }
22277 %}
22278 ins_pipe( pipe_slow );
22279 %}
22280
22281 instruct vcastLtoBS(vec dst, vec src) %{
22282 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22283 UseAVX <= 2);
22284 match(Set dst (VectorCastL2X src));
22285 format %{ "vector_cast_l2x $dst,$src" %}
22286 ins_encode %{
22287 assert(UseAVX > 0, "required");
22288
22289 int vlen = Matcher::vector_length_in_bytes(this, $src);
22290 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22291 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22292 : ExternalAddress(vector_int_to_short_mask());
22293 if (vlen <= 16) {
22294 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22295 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22296 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22297 } else {
22298 assert(vlen <= 32, "required");
22299 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22300 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22301 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22302 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22303 }
22304 if (to_elem_bt == T_BYTE) {
22305 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22306 }
22307 %}
22308 ins_pipe( pipe_slow );
22309 %}
22310
22311 instruct vcastLtoX_evex(vec dst, vec src) %{
22312 predicate(UseAVX > 2 ||
22313 (Matcher::vector_element_basic_type(n) == T_INT ||
22314 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22315 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22316 match(Set dst (VectorCastL2X src));
22317 format %{ "vector_cast_l2x $dst,$src\t!" %}
22318 ins_encode %{
22319 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22320 int vlen = Matcher::vector_length_in_bytes(this, $src);
22321 int vlen_enc = vector_length_encoding(this, $src);
22322 switch (to_elem_bt) {
22323 case T_BYTE:
22324 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22325 vlen_enc = Assembler::AVX_512bit;
22326 }
22327 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22328 break;
22329 case T_SHORT:
22330 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22331 vlen_enc = Assembler::AVX_512bit;
22332 }
22333 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22334 break;
22335 case T_INT:
22336 if (vlen == 8) {
22337 if ($dst$$XMMRegister != $src$$XMMRegister) {
22338 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22339 }
22340 } else if (vlen == 16) {
22341 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22342 } else if (vlen == 32) {
22343 if (UseAVX > 2) {
22344 if (!VM_Version::supports_avx512vl()) {
22345 vlen_enc = Assembler::AVX_512bit;
22346 }
22347 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22348 } else {
22349 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22350 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22351 }
22352 } else { // vlen == 64
22353 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22354 }
22355 break;
22356 case T_FLOAT:
22357 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22358 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22359 break;
22360 case T_DOUBLE:
22361 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22362 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22363 break;
22364
22365 default: assert(false, "%s", type2name(to_elem_bt));
22366 }
22367 %}
22368 ins_pipe( pipe_slow );
22369 %}
22370
22371 instruct vcastFtoD_reg(vec dst, vec src) %{
22372 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22373 match(Set dst (VectorCastF2X src));
22374 format %{ "vector_cast_f2d $dst,$src\t!" %}
22375 ins_encode %{
22376 int vlen_enc = vector_length_encoding(this);
22377 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22378 %}
22379 ins_pipe( pipe_slow );
22380 %}
22381
22382
22383 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22384 predicate(!VM_Version::supports_avx10_2() &&
22385 !VM_Version::supports_avx512vl() &&
22386 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22387 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22388 is_integral_type(Matcher::vector_element_basic_type(n)));
22389 match(Set dst (VectorCastF2X src));
22390 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22391 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22392 ins_encode %{
22393 int vlen_enc = vector_length_encoding(this, $src);
22394 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22395 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22396 // 32 bit addresses for register indirect addressing mode since stub constants
22397 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22398 // However, targets are free to increase this limit, but having a large code cache size
22399 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22400 // cap we save a temporary register allocation which in limiting case can prevent
22401 // spilling in high register pressure blocks.
22402 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22403 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22404 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22405 %}
22406 ins_pipe( pipe_slow );
22407 %}
22408
22409 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22410 predicate(!VM_Version::supports_avx10_2() &&
22411 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22412 is_integral_type(Matcher::vector_element_basic_type(n)));
22413 match(Set dst (VectorCastF2X src));
22414 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22415 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22416 ins_encode %{
22417 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22418 if (to_elem_bt == T_LONG) {
22419 int vlen_enc = vector_length_encoding(this);
22420 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22421 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22422 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22423 } else {
22424 int vlen_enc = vector_length_encoding(this, $src);
22425 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22426 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22427 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22428 }
22429 %}
22430 ins_pipe( pipe_slow );
22431 %}
22432
22433 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22434 predicate(VM_Version::supports_avx10_2() &&
22435 is_integral_type(Matcher::vector_element_basic_type(n)));
22436 match(Set dst (VectorCastF2X src));
22437 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22438 ins_encode %{
22439 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22440 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22441 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22442 %}
22443 ins_pipe( pipe_slow );
22444 %}
22445
22446 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22447 predicate(VM_Version::supports_avx10_2() &&
22448 is_integral_type(Matcher::vector_element_basic_type(n)));
22449 match(Set dst (VectorCastF2X (LoadVector src)));
22450 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22451 ins_encode %{
22452 int vlen = Matcher::vector_length(this);
22453 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22454 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22455 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22456 %}
22457 ins_pipe( pipe_slow );
22458 %}
22459
22460 instruct vcastDtoF_reg(vec dst, vec src) %{
22461 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22462 match(Set dst (VectorCastD2X src));
22463 format %{ "vector_cast_d2x $dst,$src\t!" %}
22464 ins_encode %{
22465 int vlen_enc = vector_length_encoding(this, $src);
22466 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22467 %}
22468 ins_pipe( pipe_slow );
22469 %}
22470
22471 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22472 predicate(!VM_Version::supports_avx10_2() &&
22473 !VM_Version::supports_avx512vl() &&
22474 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22475 is_integral_type(Matcher::vector_element_basic_type(n)));
22476 match(Set dst (VectorCastD2X src));
22477 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22478 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22479 ins_encode %{
22480 int vlen_enc = vector_length_encoding(this, $src);
22481 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22482 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22483 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22484 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22485 %}
22486 ins_pipe( pipe_slow );
22487 %}
22488
22489 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22490 predicate(!VM_Version::supports_avx10_2() &&
22491 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22492 is_integral_type(Matcher::vector_element_basic_type(n)));
22493 match(Set dst (VectorCastD2X src));
22494 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22495 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22496 ins_encode %{
22497 int vlen_enc = vector_length_encoding(this, $src);
22498 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22499 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22500 ExternalAddress(vector_float_signflip());
22501 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22502 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22503 %}
22504 ins_pipe( pipe_slow );
22505 %}
22506
22507 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22508 predicate(VM_Version::supports_avx10_2() &&
22509 is_integral_type(Matcher::vector_element_basic_type(n)));
22510 match(Set dst (VectorCastD2X src));
22511 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22512 ins_encode %{
22513 int vlen_enc = vector_length_encoding(this, $src);
22514 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22515 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22516 %}
22517 ins_pipe( pipe_slow );
22518 %}
22519
22520 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22521 predicate(VM_Version::supports_avx10_2() &&
22522 is_integral_type(Matcher::vector_element_basic_type(n)));
22523 match(Set dst (VectorCastD2X (LoadVector src)));
22524 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22525 ins_encode %{
22526 int vlen = Matcher::vector_length(this);
22527 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22528 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22529 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22530 %}
22531 ins_pipe( pipe_slow );
22532 %}
22533
22534 instruct vucast(vec dst, vec src) %{
22535 match(Set dst (VectorUCastB2X src));
22536 match(Set dst (VectorUCastS2X src));
22537 match(Set dst (VectorUCastI2X src));
22538 format %{ "vector_ucast $dst,$src\t!" %}
22539 ins_encode %{
22540 assert(UseAVX > 0, "required");
22541
22542 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22543 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22544 int vlen_enc = vector_length_encoding(this);
22545 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22546 %}
22547 ins_pipe( pipe_slow );
22548 %}
22549
22550 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22551 predicate(!VM_Version::supports_avx512vl() &&
22552 Matcher::vector_length_in_bytes(n) < 64 &&
22553 Matcher::vector_element_basic_type(n) == T_INT);
22554 match(Set dst (RoundVF src));
22555 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22556 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22557 ins_encode %{
22558 int vlen_enc = vector_length_encoding(this);
22559 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22560 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22561 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22562 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22563 %}
22564 ins_pipe( pipe_slow );
22565 %}
22566
22567 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22568 predicate((VM_Version::supports_avx512vl() ||
22569 Matcher::vector_length_in_bytes(n) == 64) &&
22570 Matcher::vector_element_basic_type(n) == T_INT);
22571 match(Set dst (RoundVF src));
22572 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22573 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22574 ins_encode %{
22575 int vlen_enc = vector_length_encoding(this);
22576 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22577 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22578 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22579 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22580 %}
22581 ins_pipe( pipe_slow );
22582 %}
22583
22584 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22585 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22586 match(Set dst (RoundVD src));
22587 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22588 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22589 ins_encode %{
22590 int vlen_enc = vector_length_encoding(this);
22591 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22592 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22593 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22594 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22595 %}
22596 ins_pipe( pipe_slow );
22597 %}
22598
22599 // --------------------------------- VectorMaskCmp --------------------------------------
22600
22601 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22602 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22603 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22604 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22605 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22606 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22607 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22608 ins_encode %{
22609 int vlen_enc = vector_length_encoding(this, $src1);
22610 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22611 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22612 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22613 } else {
22614 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22615 }
22616 %}
22617 ins_pipe( pipe_slow );
22618 %}
22619
22620 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22621 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22622 n->bottom_type()->isa_vectmask() == nullptr &&
22623 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22624 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22625 effect(TEMP ktmp);
22626 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22627 ins_encode %{
22628 int vlen_enc = Assembler::AVX_512bit;
22629 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22630 KRegister mask = k0; // The comparison itself is not being masked.
22631 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22632 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22633 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22634 } else {
22635 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22636 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22637 }
22638 %}
22639 ins_pipe( pipe_slow );
22640 %}
22641
22642 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22643 predicate(n->bottom_type()->isa_vectmask() &&
22644 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22645 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22646 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22647 ins_encode %{
22648 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22649 int vlen_enc = vector_length_encoding(this, $src1);
22650 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22651 KRegister mask = k0; // The comparison itself is not being masked.
22652 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22653 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22654 } else {
22655 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22656 }
22657 %}
22658 ins_pipe( pipe_slow );
22659 %}
22660
22661 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22662 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22663 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22664 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22665 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22666 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22667 (n->in(2)->get_int() == BoolTest::eq ||
22668 n->in(2)->get_int() == BoolTest::lt ||
22669 n->in(2)->get_int() == BoolTest::gt)); // cond
22670 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22671 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22672 ins_encode %{
22673 int vlen_enc = vector_length_encoding(this, $src1);
22674 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22675 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22676 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22677 %}
22678 ins_pipe( pipe_slow );
22679 %}
22680
22681 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22682 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22683 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22684 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22685 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22686 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22687 (n->in(2)->get_int() == BoolTest::ne ||
22688 n->in(2)->get_int() == BoolTest::le ||
22689 n->in(2)->get_int() == BoolTest::ge)); // cond
22690 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22691 effect(TEMP dst, TEMP xtmp);
22692 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22693 ins_encode %{
22694 int vlen_enc = vector_length_encoding(this, $src1);
22695 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22696 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22697 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22698 %}
22699 ins_pipe( pipe_slow );
22700 %}
22701
22702 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22703 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22704 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22705 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22706 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22707 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22708 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22709 effect(TEMP dst, TEMP xtmp);
22710 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22711 ins_encode %{
22712 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22713 int vlen_enc = vector_length_encoding(this, $src1);
22714 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22715 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22716
22717 if (vlen_enc == Assembler::AVX_128bit) {
22718 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22719 } else {
22720 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22721 }
22722 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22723 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22724 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22725 %}
22726 ins_pipe( pipe_slow );
22727 %}
22728
22729 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22730 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22731 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22732 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22733 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22734 effect(TEMP ktmp);
22735 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22736 ins_encode %{
22737 assert(UseAVX > 2, "required");
22738
22739 int vlen_enc = vector_length_encoding(this, $src1);
22740 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22741 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22742 KRegister mask = k0; // The comparison itself is not being masked.
22743 bool merge = false;
22744 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22745
22746 switch (src1_elem_bt) {
22747 case T_INT: {
22748 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22749 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22750 break;
22751 }
22752 case T_LONG: {
22753 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22754 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22755 break;
22756 }
22757 default: assert(false, "%s", type2name(src1_elem_bt));
22758 }
22759 %}
22760 ins_pipe( pipe_slow );
22761 %}
22762
22763
22764 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22765 predicate(n->bottom_type()->isa_vectmask() &&
22766 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22767 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22768 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22769 ins_encode %{
22770 assert(UseAVX > 2, "required");
22771 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22772
22773 int vlen_enc = vector_length_encoding(this, $src1);
22774 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22775 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22776 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22777
22778 // Comparison i
22779 switch (src1_elem_bt) {
22780 case T_BYTE: {
22781 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22782 break;
22783 }
22784 case T_SHORT: {
22785 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22786 break;
22787 }
22788 case T_INT: {
22789 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22790 break;
22791 }
22792 case T_LONG: {
22793 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22794 break;
22795 }
22796 default: assert(false, "%s", type2name(src1_elem_bt));
22797 }
22798 %}
22799 ins_pipe( pipe_slow );
22800 %}
22801
22802 // Extract
22803
22804 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22805 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22806 match(Set dst (ExtractI src idx));
22807 match(Set dst (ExtractS src idx));
22808 match(Set dst (ExtractB src idx));
22809 format %{ "extractI $dst,$src,$idx\t!" %}
22810 ins_encode %{
22811 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22812
22813 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22814 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22815 %}
22816 ins_pipe( pipe_slow );
22817 %}
22818
22819 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22820 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22821 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22822 match(Set dst (ExtractI src idx));
22823 match(Set dst (ExtractS src idx));
22824 match(Set dst (ExtractB src idx));
22825 effect(TEMP vtmp);
22826 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22827 ins_encode %{
22828 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22829
22830 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22831 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22832 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22833 %}
22834 ins_pipe( pipe_slow );
22835 %}
22836
22837 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22838 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22839 match(Set dst (ExtractL src idx));
22840 format %{ "extractL $dst,$src,$idx\t!" %}
22841 ins_encode %{
22842 assert(UseSSE >= 4, "required");
22843 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22844
22845 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22846 %}
22847 ins_pipe( pipe_slow );
22848 %}
22849
22850 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22851 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22852 Matcher::vector_length(n->in(1)) == 8); // src
22853 match(Set dst (ExtractL src idx));
22854 effect(TEMP vtmp);
22855 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22856 ins_encode %{
22857 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22858
22859 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22860 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22861 %}
22862 ins_pipe( pipe_slow );
22863 %}
22864
22865 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22866 predicate(Matcher::vector_length(n->in(1)) <= 4);
22867 match(Set dst (ExtractF src idx));
22868 effect(TEMP dst, TEMP vtmp);
22869 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22870 ins_encode %{
22871 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22872
22873 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22874 %}
22875 ins_pipe( pipe_slow );
22876 %}
22877
22878 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22879 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22880 Matcher::vector_length(n->in(1)/*src*/) == 16);
22881 match(Set dst (ExtractF src idx));
22882 effect(TEMP vtmp);
22883 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22884 ins_encode %{
22885 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22886
22887 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22888 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22889 %}
22890 ins_pipe( pipe_slow );
22891 %}
22892
22893 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22894 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22895 match(Set dst (ExtractD src idx));
22896 format %{ "extractD $dst,$src,$idx\t!" %}
22897 ins_encode %{
22898 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22899
22900 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22901 %}
22902 ins_pipe( pipe_slow );
22903 %}
22904
22905 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22906 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22907 Matcher::vector_length(n->in(1)) == 8); // src
22908 match(Set dst (ExtractD src idx));
22909 effect(TEMP vtmp);
22910 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22911 ins_encode %{
22912 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22913
22914 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22915 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22916 %}
22917 ins_pipe( pipe_slow );
22918 %}
22919
22920 // --------------------------------- Vector Blend --------------------------------------
22921
22922 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22923 predicate(UseAVX == 0);
22924 match(Set dst (VectorBlend (Binary dst src) mask));
22925 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22926 effect(TEMP tmp);
22927 ins_encode %{
22928 assert(UseSSE >= 4, "required");
22929
22930 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22931 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22932 }
22933 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22934 %}
22935 ins_pipe( pipe_slow );
22936 %}
22937
22938 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22939 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22940 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22941 Matcher::vector_length_in_bytes(n) <= 32 &&
22942 is_integral_type(Matcher::vector_element_basic_type(n)));
22943 match(Set dst (VectorBlend (Binary src1 src2) mask));
22944 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22945 ins_encode %{
22946 int vlen_enc = vector_length_encoding(this);
22947 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22948 %}
22949 ins_pipe( pipe_slow );
22950 %}
22951
22952 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22953 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22954 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22955 Matcher::vector_length_in_bytes(n) <= 32 &&
22956 !is_integral_type(Matcher::vector_element_basic_type(n)));
22957 match(Set dst (VectorBlend (Binary src1 src2) mask));
22958 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22959 ins_encode %{
22960 int vlen_enc = vector_length_encoding(this);
22961 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22962 %}
22963 ins_pipe( pipe_slow );
22964 %}
22965
22966 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22967 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22968 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22969 Matcher::vector_length_in_bytes(n) <= 32);
22970 match(Set dst (VectorBlend (Binary src1 src2) mask));
22971 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22972 effect(TEMP vtmp, TEMP dst);
22973 ins_encode %{
22974 int vlen_enc = vector_length_encoding(this);
22975 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22976 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22977 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22978 %}
22979 ins_pipe( pipe_slow );
22980 %}
22981
22982 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22983 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22984 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22985 match(Set dst (VectorBlend (Binary src1 src2) mask));
22986 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22987 effect(TEMP ktmp);
22988 ins_encode %{
22989 int vlen_enc = Assembler::AVX_512bit;
22990 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22991 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22992 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22993 %}
22994 ins_pipe( pipe_slow );
22995 %}
22996
22997
22998 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22999 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23000 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23001 VM_Version::supports_avx512bw()));
23002 match(Set dst (VectorBlend (Binary src1 src2) mask));
23003 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23004 ins_encode %{
23005 int vlen_enc = vector_length_encoding(this);
23006 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23007 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23008 %}
23009 ins_pipe( pipe_slow );
23010 %}
23011
23012 // --------------------------------- ABS --------------------------------------
23013 // a = |a|
23014 instruct vabsB_reg(vec dst, vec src) %{
23015 match(Set dst (AbsVB src));
23016 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23017 ins_encode %{
23018 uint vlen = Matcher::vector_length(this);
23019 if (vlen <= 16) {
23020 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23021 } else {
23022 int vlen_enc = vector_length_encoding(this);
23023 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23024 }
23025 %}
23026 ins_pipe( pipe_slow );
23027 %}
23028
23029 instruct vabsS_reg(vec dst, vec src) %{
23030 match(Set dst (AbsVS src));
23031 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23032 ins_encode %{
23033 uint vlen = Matcher::vector_length(this);
23034 if (vlen <= 8) {
23035 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23036 } else {
23037 int vlen_enc = vector_length_encoding(this);
23038 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23039 }
23040 %}
23041 ins_pipe( pipe_slow );
23042 %}
23043
23044 instruct vabsI_reg(vec dst, vec src) %{
23045 match(Set dst (AbsVI src));
23046 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23047 ins_encode %{
23048 uint vlen = Matcher::vector_length(this);
23049 if (vlen <= 4) {
23050 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23051 } else {
23052 int vlen_enc = vector_length_encoding(this);
23053 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23054 }
23055 %}
23056 ins_pipe( pipe_slow );
23057 %}
23058
23059 instruct vabsL_reg(vec dst, vec src) %{
23060 match(Set dst (AbsVL src));
23061 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23062 ins_encode %{
23063 assert(UseAVX > 2, "required");
23064 int vlen_enc = vector_length_encoding(this);
23065 if (!VM_Version::supports_avx512vl()) {
23066 vlen_enc = Assembler::AVX_512bit;
23067 }
23068 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23069 %}
23070 ins_pipe( pipe_slow );
23071 %}
23072
23073 // --------------------------------- ABSNEG --------------------------------------
23074
23075 instruct vabsnegF(vec dst, vec src) %{
23076 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23077 match(Set dst (AbsVF src));
23078 match(Set dst (NegVF src));
23079 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23080 ins_cost(150);
23081 ins_encode %{
23082 int opcode = this->ideal_Opcode();
23083 int vlen = Matcher::vector_length(this);
23084 if (vlen == 2) {
23085 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23086 } else {
23087 assert(vlen == 8 || vlen == 16, "required");
23088 int vlen_enc = vector_length_encoding(this);
23089 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23090 }
23091 %}
23092 ins_pipe( pipe_slow );
23093 %}
23094
23095 instruct vabsneg4F(vec dst) %{
23096 predicate(Matcher::vector_length(n) == 4);
23097 match(Set dst (AbsVF dst));
23098 match(Set dst (NegVF dst));
23099 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23100 ins_cost(150);
23101 ins_encode %{
23102 int opcode = this->ideal_Opcode();
23103 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23104 %}
23105 ins_pipe( pipe_slow );
23106 %}
23107
23108 instruct vabsnegD(vec dst, vec src) %{
23109 match(Set dst (AbsVD src));
23110 match(Set dst (NegVD src));
23111 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23112 ins_encode %{
23113 int opcode = this->ideal_Opcode();
23114 uint vlen = Matcher::vector_length(this);
23115 if (vlen == 2) {
23116 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23117 } else {
23118 int vlen_enc = vector_length_encoding(this);
23119 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23120 }
23121 %}
23122 ins_pipe( pipe_slow );
23123 %}
23124
23125 //------------------------------------- VectorTest --------------------------------------------
23126
23127 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23128 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23129 match(Set cr (VectorTest src1 src2));
23130 effect(TEMP vtmp);
23131 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23132 ins_encode %{
23133 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23134 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23135 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23136 %}
23137 ins_pipe( pipe_slow );
23138 %}
23139
23140 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23141 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23142 match(Set cr (VectorTest src1 src2));
23143 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23144 ins_encode %{
23145 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23146 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23147 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23148 %}
23149 ins_pipe( pipe_slow );
23150 %}
23151
23152 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23153 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23154 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23155 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23156 match(Set cr (VectorTest src1 src2));
23157 effect(TEMP tmp);
23158 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23159 ins_encode %{
23160 uint masklen = Matcher::vector_length(this, $src1);
23161 __ kmovwl($tmp$$Register, $src1$$KRegister);
23162 __ andl($tmp$$Register, (1 << masklen) - 1);
23163 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23164 %}
23165 ins_pipe( pipe_slow );
23166 %}
23167
23168 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23169 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23170 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23171 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23172 match(Set cr (VectorTest src1 src2));
23173 effect(TEMP tmp);
23174 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23175 ins_encode %{
23176 uint masklen = Matcher::vector_length(this, $src1);
23177 __ kmovwl($tmp$$Register, $src1$$KRegister);
23178 __ andl($tmp$$Register, (1 << masklen) - 1);
23179 %}
23180 ins_pipe( pipe_slow );
23181 %}
23182
23183 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23184 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23185 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23186 match(Set cr (VectorTest src1 src2));
23187 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23188 ins_encode %{
23189 uint masklen = Matcher::vector_length(this, $src1);
23190 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23191 %}
23192 ins_pipe( pipe_slow );
23193 %}
23194
23195 //------------------------------------- LoadMask --------------------------------------------
23196
23197 instruct loadMask(legVec dst, legVec src) %{
23198 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23199 match(Set dst (VectorLoadMask src));
23200 effect(TEMP dst);
23201 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23202 ins_encode %{
23203 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23204 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23205 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23206 %}
23207 ins_pipe( pipe_slow );
23208 %}
23209
23210 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23211 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23212 match(Set dst (VectorLoadMask src));
23213 effect(TEMP xtmp);
23214 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23215 ins_encode %{
23216 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23217 true, Assembler::AVX_512bit);
23218 %}
23219 ins_pipe( pipe_slow );
23220 %}
23221
23222 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23223 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23224 match(Set dst (VectorLoadMask src));
23225 effect(TEMP xtmp);
23226 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23227 ins_encode %{
23228 int vlen_enc = vector_length_encoding(in(1));
23229 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23230 false, vlen_enc);
23231 %}
23232 ins_pipe( pipe_slow );
23233 %}
23234
23235 //------------------------------------- StoreMask --------------------------------------------
23236
23237 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23238 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23239 match(Set dst (VectorStoreMask src size));
23240 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23241 ins_encode %{
23242 int vlen = Matcher::vector_length(this);
23243 if (vlen <= 16 && UseAVX <= 2) {
23244 assert(UseSSE >= 3, "required");
23245 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23246 } else {
23247 assert(UseAVX > 0, "required");
23248 int src_vlen_enc = vector_length_encoding(this, $src);
23249 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23250 }
23251 %}
23252 ins_pipe( pipe_slow );
23253 %}
23254
23255 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23256 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23257 match(Set dst (VectorStoreMask src size));
23258 effect(TEMP_DEF dst, TEMP xtmp);
23259 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23260 ins_encode %{
23261 int vlen_enc = Assembler::AVX_128bit;
23262 int vlen = Matcher::vector_length(this);
23263 if (vlen <= 8) {
23264 assert(UseSSE >= 3, "required");
23265 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23266 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23267 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23268 } else {
23269 assert(UseAVX > 0, "required");
23270 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23271 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23272 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23273 }
23274 %}
23275 ins_pipe( pipe_slow );
23276 %}
23277
23278 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23279 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23280 match(Set dst (VectorStoreMask src size));
23281 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23282 effect(TEMP_DEF dst, TEMP xtmp);
23283 ins_encode %{
23284 int vlen_enc = Assembler::AVX_128bit;
23285 int vlen = Matcher::vector_length(this);
23286 if (vlen <= 4) {
23287 assert(UseSSE >= 3, "required");
23288 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23289 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23290 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23291 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23292 } else {
23293 assert(UseAVX > 0, "required");
23294 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23295 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23296 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23297 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23298 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23299 }
23300 %}
23301 ins_pipe( pipe_slow );
23302 %}
23303
23304 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23305 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23306 match(Set dst (VectorStoreMask src size));
23307 effect(TEMP_DEF dst, TEMP xtmp);
23308 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23309 ins_encode %{
23310 assert(UseSSE >= 3, "required");
23311 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23312 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23313 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23314 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23315 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23316 %}
23317 ins_pipe( pipe_slow );
23318 %}
23319
23320 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23321 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23322 match(Set dst (VectorStoreMask src size));
23323 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23324 effect(TEMP_DEF dst, TEMP vtmp);
23325 ins_encode %{
23326 int vlen_enc = Assembler::AVX_128bit;
23327 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23328 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23329 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23330 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23331 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23332 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23333 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23334 %}
23335 ins_pipe( pipe_slow );
23336 %}
23337
23338 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23339 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23340 match(Set dst (VectorStoreMask src size));
23341 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23342 ins_encode %{
23343 int src_vlen_enc = vector_length_encoding(this, $src);
23344 int dst_vlen_enc = vector_length_encoding(this);
23345 if (!VM_Version::supports_avx512vl()) {
23346 src_vlen_enc = Assembler::AVX_512bit;
23347 }
23348 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23349 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23350 %}
23351 ins_pipe( pipe_slow );
23352 %}
23353
23354 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23355 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23356 match(Set dst (VectorStoreMask src size));
23357 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23358 ins_encode %{
23359 int src_vlen_enc = vector_length_encoding(this, $src);
23360 int dst_vlen_enc = vector_length_encoding(this);
23361 if (!VM_Version::supports_avx512vl()) {
23362 src_vlen_enc = Assembler::AVX_512bit;
23363 }
23364 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23365 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23366 %}
23367 ins_pipe( pipe_slow );
23368 %}
23369
23370 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23371 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23372 match(Set dst (VectorStoreMask mask size));
23373 effect(TEMP_DEF dst);
23374 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23375 ins_encode %{
23376 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23377 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23378 false, Assembler::AVX_512bit, noreg);
23379 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23380 %}
23381 ins_pipe( pipe_slow );
23382 %}
23383
23384 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23385 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23386 match(Set dst (VectorStoreMask mask size));
23387 effect(TEMP_DEF dst);
23388 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23389 ins_encode %{
23390 int dst_vlen_enc = vector_length_encoding(this);
23391 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23392 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23393 %}
23394 ins_pipe( pipe_slow );
23395 %}
23396
23397 instruct vmaskcast_evex(kReg dst) %{
23398 match(Set dst (VectorMaskCast dst));
23399 ins_cost(0);
23400 format %{ "vector_mask_cast $dst" %}
23401 ins_encode %{
23402 // empty
23403 %}
23404 ins_pipe(empty);
23405 %}
23406
23407 instruct vmaskcast(vec dst) %{
23408 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23409 match(Set dst (VectorMaskCast dst));
23410 ins_cost(0);
23411 format %{ "vector_mask_cast $dst" %}
23412 ins_encode %{
23413 // empty
23414 %}
23415 ins_pipe(empty);
23416 %}
23417
23418 instruct vmaskcast_avx(vec dst, vec src) %{
23419 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23420 match(Set dst (VectorMaskCast src));
23421 format %{ "vector_mask_cast $dst, $src" %}
23422 ins_encode %{
23423 int vlen = Matcher::vector_length(this);
23424 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23425 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23426 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23427 %}
23428 ins_pipe(pipe_slow);
23429 %}
23430
23431 //-------------------------------- Load Iota Indices ----------------------------------
23432
23433 instruct loadIotaIndices(vec dst, immI_0 src) %{
23434 match(Set dst (VectorLoadConst src));
23435 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23436 ins_encode %{
23437 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23438 BasicType bt = Matcher::vector_element_basic_type(this);
23439 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23440 %}
23441 ins_pipe( pipe_slow );
23442 %}
23443
23444 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23445 match(Set dst (PopulateIndex src1 src2));
23446 effect(TEMP dst, TEMP vtmp);
23447 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23448 ins_encode %{
23449 assert($src2$$constant == 1, "required");
23450 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23451 int vlen_enc = vector_length_encoding(this);
23452 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23453 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23454 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23455 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23456 %}
23457 ins_pipe( pipe_slow );
23458 %}
23459
23460 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23461 match(Set dst (PopulateIndex src1 src2));
23462 effect(TEMP dst, TEMP vtmp);
23463 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23464 ins_encode %{
23465 assert($src2$$constant == 1, "required");
23466 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23467 int vlen_enc = vector_length_encoding(this);
23468 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23469 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23470 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23471 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23472 %}
23473 ins_pipe( pipe_slow );
23474 %}
23475
23476 //-------------------------------- Rearrange ----------------------------------
23477
23478 // LoadShuffle/Rearrange for Byte
23479 instruct rearrangeB(vec dst, vec shuffle) %{
23480 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23481 Matcher::vector_length(n) < 32);
23482 match(Set dst (VectorRearrange dst shuffle));
23483 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23484 ins_encode %{
23485 assert(UseSSE >= 4, "required");
23486 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23487 %}
23488 ins_pipe( pipe_slow );
23489 %}
23490
23491 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23492 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23493 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23494 match(Set dst (VectorRearrange src shuffle));
23495 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23496 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23497 ins_encode %{
23498 assert(UseAVX >= 2, "required");
23499 // Swap src into vtmp1
23500 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23501 // Shuffle swapped src to get entries from other 128 bit lane
23502 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23503 // Shuffle original src to get entries from self 128 bit lane
23504 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23505 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23506 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23507 // Perform the blend
23508 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23509 %}
23510 ins_pipe( pipe_slow );
23511 %}
23512
23513
23514 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23515 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23516 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23517 match(Set dst (VectorRearrange src shuffle));
23518 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23519 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23520 ins_encode %{
23521 int vlen_enc = vector_length_encoding(this);
23522 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23523 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23524 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23525 %}
23526 ins_pipe( pipe_slow );
23527 %}
23528
23529 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23530 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23531 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23532 match(Set dst (VectorRearrange src shuffle));
23533 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23534 ins_encode %{
23535 int vlen_enc = vector_length_encoding(this);
23536 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23537 %}
23538 ins_pipe( pipe_slow );
23539 %}
23540
23541 // LoadShuffle/Rearrange for Short
23542
23543 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23544 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23545 !VM_Version::supports_avx512bw());
23546 match(Set dst (VectorLoadShuffle src));
23547 effect(TEMP dst, TEMP vtmp);
23548 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23549 ins_encode %{
23550 // Create a byte shuffle mask from short shuffle mask
23551 // only byte shuffle instruction available on these platforms
23552 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23553 if (UseAVX == 0) {
23554 assert(vlen_in_bytes <= 16, "required");
23555 // Multiply each shuffle by two to get byte index
23556 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23557 __ psllw($vtmp$$XMMRegister, 1);
23558
23559 // Duplicate to create 2 copies of byte index
23560 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23561 __ psllw($dst$$XMMRegister, 8);
23562 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23563
23564 // Add one to get alternate byte index
23565 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23566 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23567 } else {
23568 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23569 int vlen_enc = vector_length_encoding(this);
23570 // Multiply each shuffle by two to get byte index
23571 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23572
23573 // Duplicate to create 2 copies of byte index
23574 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23575 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23576
23577 // Add one to get alternate byte index
23578 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23579 }
23580 %}
23581 ins_pipe( pipe_slow );
23582 %}
23583
23584 instruct rearrangeS(vec dst, vec shuffle) %{
23585 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23586 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23587 match(Set dst (VectorRearrange dst shuffle));
23588 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23589 ins_encode %{
23590 assert(UseSSE >= 4, "required");
23591 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23592 %}
23593 ins_pipe( pipe_slow );
23594 %}
23595
23596 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23597 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23598 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23599 match(Set dst (VectorRearrange src shuffle));
23600 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23601 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23602 ins_encode %{
23603 assert(UseAVX >= 2, "required");
23604 // Swap src into vtmp1
23605 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23606 // Shuffle swapped src to get entries from other 128 bit lane
23607 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23608 // Shuffle original src to get entries from self 128 bit lane
23609 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23610 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23611 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23612 // Perform the blend
23613 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23614 %}
23615 ins_pipe( pipe_slow );
23616 %}
23617
23618 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23619 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23620 VM_Version::supports_avx512bw());
23621 match(Set dst (VectorRearrange src shuffle));
23622 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23623 ins_encode %{
23624 int vlen_enc = vector_length_encoding(this);
23625 if (!VM_Version::supports_avx512vl()) {
23626 vlen_enc = Assembler::AVX_512bit;
23627 }
23628 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23629 %}
23630 ins_pipe( pipe_slow );
23631 %}
23632
23633 // LoadShuffle/Rearrange for Integer and Float
23634
23635 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23636 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23637 Matcher::vector_length(n) == 4 && UseAVX == 0);
23638 match(Set dst (VectorLoadShuffle src));
23639 effect(TEMP dst, TEMP vtmp);
23640 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23641 ins_encode %{
23642 assert(UseSSE >= 4, "required");
23643
23644 // Create a byte shuffle mask from int shuffle mask
23645 // only byte shuffle instruction available on these platforms
23646
23647 // Duplicate and multiply each shuffle by 4
23648 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23649 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23650 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23651 __ psllw($vtmp$$XMMRegister, 2);
23652
23653 // Duplicate again to create 4 copies of byte index
23654 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23655 __ psllw($dst$$XMMRegister, 8);
23656 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23657
23658 // Add 3,2,1,0 to get alternate byte index
23659 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23660 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23661 %}
23662 ins_pipe( pipe_slow );
23663 %}
23664
23665 instruct rearrangeI(vec dst, vec shuffle) %{
23666 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23667 UseAVX == 0);
23668 match(Set dst (VectorRearrange dst shuffle));
23669 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23670 ins_encode %{
23671 assert(UseSSE >= 4, "required");
23672 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23673 %}
23674 ins_pipe( pipe_slow );
23675 %}
23676
23677 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23678 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23679 UseAVX > 0);
23680 match(Set dst (VectorRearrange src shuffle));
23681 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23682 ins_encode %{
23683 int vlen_enc = vector_length_encoding(this);
23684 BasicType bt = Matcher::vector_element_basic_type(this);
23685 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23686 %}
23687 ins_pipe( pipe_slow );
23688 %}
23689
23690 // LoadShuffle/Rearrange for Long and Double
23691
23692 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23693 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23694 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23695 match(Set dst (VectorLoadShuffle src));
23696 effect(TEMP dst, TEMP vtmp);
23697 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23698 ins_encode %{
23699 assert(UseAVX >= 2, "required");
23700
23701 int vlen_enc = vector_length_encoding(this);
23702 // Create a double word shuffle mask from long shuffle mask
23703 // only double word shuffle instruction available on these platforms
23704
23705 // Multiply each shuffle by two to get double word index
23706 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23707
23708 // Duplicate each double word shuffle
23709 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23710 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23711
23712 // Add one to get alternate double word index
23713 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23714 %}
23715 ins_pipe( pipe_slow );
23716 %}
23717
23718 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23719 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23720 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23721 match(Set dst (VectorRearrange src shuffle));
23722 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23723 ins_encode %{
23724 assert(UseAVX >= 2, "required");
23725
23726 int vlen_enc = vector_length_encoding(this);
23727 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23728 %}
23729 ins_pipe( pipe_slow );
23730 %}
23731
23732 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23733 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23734 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23735 match(Set dst (VectorRearrange src shuffle));
23736 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23737 ins_encode %{
23738 assert(UseAVX > 2, "required");
23739
23740 int vlen_enc = vector_length_encoding(this);
23741 if (vlen_enc == Assembler::AVX_128bit) {
23742 vlen_enc = Assembler::AVX_256bit;
23743 }
23744 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23745 %}
23746 ins_pipe( pipe_slow );
23747 %}
23748
23749 // --------------------------------- FMA --------------------------------------
23750 // a * b + c
23751
23752 instruct vfmaF_reg(vec a, vec b, vec c) %{
23753 match(Set c (FmaVF c (Binary a b)));
23754 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23755 ins_cost(150);
23756 ins_encode %{
23757 assert(UseFMA, "not enabled");
23758 int vlen_enc = vector_length_encoding(this);
23759 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23760 %}
23761 ins_pipe( pipe_slow );
23762 %}
23763
23764 instruct vfmaF_mem(vec a, memory b, vec c) %{
23765 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23766 match(Set c (FmaVF c (Binary a (LoadVector b))));
23767 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23768 ins_cost(150);
23769 ins_encode %{
23770 assert(UseFMA, "not enabled");
23771 int vlen_enc = vector_length_encoding(this);
23772 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23773 %}
23774 ins_pipe( pipe_slow );
23775 %}
23776
23777 instruct vfmaD_reg(vec a, vec b, vec c) %{
23778 match(Set c (FmaVD c (Binary a b)));
23779 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23780 ins_cost(150);
23781 ins_encode %{
23782 assert(UseFMA, "not enabled");
23783 int vlen_enc = vector_length_encoding(this);
23784 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23785 %}
23786 ins_pipe( pipe_slow );
23787 %}
23788
23789 instruct vfmaD_mem(vec a, memory b, vec c) %{
23790 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23791 match(Set c (FmaVD c (Binary a (LoadVector b))));
23792 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23793 ins_cost(150);
23794 ins_encode %{
23795 assert(UseFMA, "not enabled");
23796 int vlen_enc = vector_length_encoding(this);
23797 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23798 %}
23799 ins_pipe( pipe_slow );
23800 %}
23801
23802 // --------------------------------- Vector Multiply Add --------------------------------------
23803
23804 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23805 predicate(UseAVX == 0);
23806 match(Set dst (MulAddVS2VI dst src1));
23807 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23808 ins_encode %{
23809 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23810 %}
23811 ins_pipe( pipe_slow );
23812 %}
23813
23814 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23815 predicate(UseAVX > 0);
23816 match(Set dst (MulAddVS2VI src1 src2));
23817 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23818 ins_encode %{
23819 int vlen_enc = vector_length_encoding(this);
23820 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23821 %}
23822 ins_pipe( pipe_slow );
23823 %}
23824
23825 // --------------------------------- Vector Multiply Add Add ----------------------------------
23826
23827 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23828 predicate(VM_Version::supports_avx512_vnni());
23829 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23830 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23831 ins_encode %{
23832 assert(UseAVX > 2, "required");
23833 int vlen_enc = vector_length_encoding(this);
23834 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23835 %}
23836 ins_pipe( pipe_slow );
23837 ins_cost(10);
23838 %}
23839
23840 // --------------------------------- PopCount --------------------------------------
23841
23842 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23843 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23844 match(Set dst (PopCountVI src));
23845 match(Set dst (PopCountVL src));
23846 format %{ "vector_popcount_integral $dst, $src" %}
23847 ins_encode %{
23848 int opcode = this->ideal_Opcode();
23849 int vlen_enc = vector_length_encoding(this, $src);
23850 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23851 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23852 %}
23853 ins_pipe( pipe_slow );
23854 %}
23855
23856 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23857 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23858 match(Set dst (PopCountVI src mask));
23859 match(Set dst (PopCountVL src mask));
23860 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23861 ins_encode %{
23862 int vlen_enc = vector_length_encoding(this, $src);
23863 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23864 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23865 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23866 %}
23867 ins_pipe( pipe_slow );
23868 %}
23869
23870 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23871 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23872 match(Set dst (PopCountVI src));
23873 match(Set dst (PopCountVL src));
23874 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23875 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23876 ins_encode %{
23877 int opcode = this->ideal_Opcode();
23878 int vlen_enc = vector_length_encoding(this, $src);
23879 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23880 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23881 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23882 %}
23883 ins_pipe( pipe_slow );
23884 %}
23885
23886 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23887
23888 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23889 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23890 Matcher::vector_length_in_bytes(n->in(1))));
23891 match(Set dst (CountTrailingZerosV src));
23892 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23893 ins_cost(400);
23894 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23895 ins_encode %{
23896 int vlen_enc = vector_length_encoding(this, $src);
23897 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23898 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23899 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23900 %}
23901 ins_pipe( pipe_slow );
23902 %}
23903
23904 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23905 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23906 VM_Version::supports_avx512cd() &&
23907 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23908 match(Set dst (CountTrailingZerosV src));
23909 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23910 ins_cost(400);
23911 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23912 ins_encode %{
23913 int vlen_enc = vector_length_encoding(this, $src);
23914 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23915 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23916 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23917 %}
23918 ins_pipe( pipe_slow );
23919 %}
23920
23921 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23922 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23923 match(Set dst (CountTrailingZerosV src));
23924 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23925 ins_cost(400);
23926 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23927 ins_encode %{
23928 int vlen_enc = vector_length_encoding(this, $src);
23929 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23930 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23931 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23932 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23933 %}
23934 ins_pipe( pipe_slow );
23935 %}
23936
23937 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23938 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23939 match(Set dst (CountTrailingZerosV src));
23940 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23941 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23942 ins_encode %{
23943 int vlen_enc = vector_length_encoding(this, $src);
23944 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23945 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23946 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23947 %}
23948 ins_pipe( pipe_slow );
23949 %}
23950
23951
23952 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23953
23954 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23955 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23956 effect(TEMP dst);
23957 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23958 ins_encode %{
23959 int vector_len = vector_length_encoding(this);
23960 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23961 %}
23962 ins_pipe( pipe_slow );
23963 %}
23964
23965 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23966 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23967 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23968 effect(TEMP dst);
23969 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23970 ins_encode %{
23971 int vector_len = vector_length_encoding(this);
23972 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23973 %}
23974 ins_pipe( pipe_slow );
23975 %}
23976
23977 // --------------------------------- Rotation Operations ----------------------------------
23978 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23979 match(Set dst (RotateLeftV src shift));
23980 match(Set dst (RotateRightV src shift));
23981 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23982 ins_encode %{
23983 int opcode = this->ideal_Opcode();
23984 int vector_len = vector_length_encoding(this);
23985 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23986 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23987 %}
23988 ins_pipe( pipe_slow );
23989 %}
23990
23991 instruct vprorate(vec dst, vec src, vec shift) %{
23992 match(Set dst (RotateLeftV src shift));
23993 match(Set dst (RotateRightV src shift));
23994 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23995 ins_encode %{
23996 int opcode = this->ideal_Opcode();
23997 int vector_len = vector_length_encoding(this);
23998 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23999 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24000 %}
24001 ins_pipe( pipe_slow );
24002 %}
24003
24004 // ---------------------------------- Masked Operations ------------------------------------
24005 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24006 predicate(!n->in(3)->bottom_type()->isa_vectmask());
24007 match(Set dst (LoadVectorMasked mem mask));
24008 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24009 ins_encode %{
24010 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24011 int vlen_enc = vector_length_encoding(this);
24012 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24013 %}
24014 ins_pipe( pipe_slow );
24015 %}
24016
24017
24018 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24019 predicate(n->in(3)->bottom_type()->isa_vectmask());
24020 match(Set dst (LoadVectorMasked mem mask));
24021 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24022 ins_encode %{
24023 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24024 int vector_len = vector_length_encoding(this);
24025 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24026 %}
24027 ins_pipe( pipe_slow );
24028 %}
24029
24030 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24031 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24032 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24033 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24034 ins_encode %{
24035 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24036 int vlen_enc = vector_length_encoding(src_node);
24037 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24038 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24039 %}
24040 ins_pipe( pipe_slow );
24041 %}
24042
24043 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24044 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24045 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24046 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24047 ins_encode %{
24048 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24049 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24050 int vlen_enc = vector_length_encoding(src_node);
24051 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24052 %}
24053 ins_pipe( pipe_slow );
24054 %}
24055
24056 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24057 match(Set addr (VerifyVectorAlignment addr mask));
24058 effect(KILL cr);
24059 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24060 ins_encode %{
24061 Label Lskip;
24062 // check if masked bits of addr are zero
24063 __ testq($addr$$Register, $mask$$constant);
24064 __ jccb(Assembler::equal, Lskip);
24065 __ stop("verify_vector_alignment found a misaligned vector memory access");
24066 __ bind(Lskip);
24067 %}
24068 ins_pipe(pipe_slow);
24069 %}
24070
24071 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24072 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24073 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24074 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24075 ins_encode %{
24076 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24077 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24078
24079 Label DONE;
24080 int vlen_enc = vector_length_encoding(this, $src1);
24081 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24082
24083 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24084 __ mov64($dst$$Register, -1L);
24085 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24086 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24087 __ jccb(Assembler::carrySet, DONE);
24088 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24089 __ notq($dst$$Register);
24090 __ tzcntq($dst$$Register, $dst$$Register);
24091 __ bind(DONE);
24092 %}
24093 ins_pipe( pipe_slow );
24094 %}
24095
24096
24097 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24098 match(Set dst (VectorMaskGen len));
24099 effect(TEMP temp, KILL cr);
24100 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24101 ins_encode %{
24102 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24103 %}
24104 ins_pipe( pipe_slow );
24105 %}
24106
24107 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24108 match(Set dst (VectorMaskGen len));
24109 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24110 effect(TEMP temp);
24111 ins_encode %{
24112 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24113 __ kmovql($dst$$KRegister, $temp$$Register);
24114 %}
24115 ins_pipe( pipe_slow );
24116 %}
24117
24118 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24119 predicate(n->in(1)->bottom_type()->isa_vectmask());
24120 match(Set dst (VectorMaskToLong mask));
24121 effect(TEMP dst, KILL cr);
24122 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24123 ins_encode %{
24124 int opcode = this->ideal_Opcode();
24125 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24126 int mask_len = Matcher::vector_length(this, $mask);
24127 int mask_size = mask_len * type2aelembytes(mbt);
24128 int vlen_enc = vector_length_encoding(this, $mask);
24129 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24130 $dst$$Register, mask_len, mask_size, vlen_enc);
24131 %}
24132 ins_pipe( pipe_slow );
24133 %}
24134
24135 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24136 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24137 match(Set dst (VectorMaskToLong mask));
24138 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24139 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24140 ins_encode %{
24141 int opcode = this->ideal_Opcode();
24142 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24143 int mask_len = Matcher::vector_length(this, $mask);
24144 int vlen_enc = vector_length_encoding(this, $mask);
24145 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24146 $dst$$Register, mask_len, mbt, vlen_enc);
24147 %}
24148 ins_pipe( pipe_slow );
24149 %}
24150
24151 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24152 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24153 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24154 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24155 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24156 ins_encode %{
24157 int opcode = this->ideal_Opcode();
24158 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24159 int mask_len = Matcher::vector_length(this, $mask);
24160 int vlen_enc = vector_length_encoding(this, $mask);
24161 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24162 $dst$$Register, mask_len, mbt, vlen_enc);
24163 %}
24164 ins_pipe( pipe_slow );
24165 %}
24166
24167 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24168 predicate(n->in(1)->bottom_type()->isa_vectmask());
24169 match(Set dst (VectorMaskTrueCount mask));
24170 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24171 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24172 ins_encode %{
24173 int opcode = this->ideal_Opcode();
24174 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24175 int mask_len = Matcher::vector_length(this, $mask);
24176 int mask_size = mask_len * type2aelembytes(mbt);
24177 int vlen_enc = vector_length_encoding(this, $mask);
24178 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24179 $tmp$$Register, mask_len, mask_size, vlen_enc);
24180 %}
24181 ins_pipe( pipe_slow );
24182 %}
24183
24184 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24185 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24186 match(Set dst (VectorMaskTrueCount mask));
24187 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24188 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24189 ins_encode %{
24190 int opcode = this->ideal_Opcode();
24191 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24192 int mask_len = Matcher::vector_length(this, $mask);
24193 int vlen_enc = vector_length_encoding(this, $mask);
24194 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24195 $tmp$$Register, mask_len, mbt, vlen_enc);
24196 %}
24197 ins_pipe( pipe_slow );
24198 %}
24199
24200 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24201 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24202 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24203 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24204 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24205 ins_encode %{
24206 int opcode = this->ideal_Opcode();
24207 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24208 int mask_len = Matcher::vector_length(this, $mask);
24209 int vlen_enc = vector_length_encoding(this, $mask);
24210 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24211 $tmp$$Register, mask_len, mbt, vlen_enc);
24212 %}
24213 ins_pipe( pipe_slow );
24214 %}
24215
24216 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24217 predicate(n->in(1)->bottom_type()->isa_vectmask());
24218 match(Set dst (VectorMaskFirstTrue mask));
24219 match(Set dst (VectorMaskLastTrue mask));
24220 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24221 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24222 ins_encode %{
24223 int opcode = this->ideal_Opcode();
24224 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24225 int mask_len = Matcher::vector_length(this, $mask);
24226 int mask_size = mask_len * type2aelembytes(mbt);
24227 int vlen_enc = vector_length_encoding(this, $mask);
24228 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24229 $tmp$$Register, mask_len, mask_size, vlen_enc);
24230 %}
24231 ins_pipe( pipe_slow );
24232 %}
24233
24234 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24235 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24236 match(Set dst (VectorMaskFirstTrue mask));
24237 match(Set dst (VectorMaskLastTrue mask));
24238 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24239 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24240 ins_encode %{
24241 int opcode = this->ideal_Opcode();
24242 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24243 int mask_len = Matcher::vector_length(this, $mask);
24244 int vlen_enc = vector_length_encoding(this, $mask);
24245 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24246 $tmp$$Register, mask_len, mbt, vlen_enc);
24247 %}
24248 ins_pipe( pipe_slow );
24249 %}
24250
24251 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24252 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24253 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24254 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24255 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24256 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24257 ins_encode %{
24258 int opcode = this->ideal_Opcode();
24259 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24260 int mask_len = Matcher::vector_length(this, $mask);
24261 int vlen_enc = vector_length_encoding(this, $mask);
24262 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24263 $tmp$$Register, mask_len, mbt, vlen_enc);
24264 %}
24265 ins_pipe( pipe_slow );
24266 %}
24267
24268 // --------------------------------- Compress/Expand Operations ---------------------------
24269 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24270 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24271 match(Set dst (CompressV src mask));
24272 match(Set dst (ExpandV src mask));
24273 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24274 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24275 ins_encode %{
24276 int opcode = this->ideal_Opcode();
24277 int vlen_enc = vector_length_encoding(this);
24278 BasicType bt = Matcher::vector_element_basic_type(this);
24279 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24280 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24281 %}
24282 ins_pipe( pipe_slow );
24283 %}
24284
24285 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24286 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24287 match(Set dst (CompressV src mask));
24288 match(Set dst (ExpandV src mask));
24289 format %{ "vector_compress_expand $dst, $src, $mask" %}
24290 ins_encode %{
24291 int opcode = this->ideal_Opcode();
24292 int vector_len = vector_length_encoding(this);
24293 BasicType bt = Matcher::vector_element_basic_type(this);
24294 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24295 %}
24296 ins_pipe( pipe_slow );
24297 %}
24298
24299 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24300 match(Set dst (CompressM mask));
24301 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24302 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24303 ins_encode %{
24304 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24305 int mask_len = Matcher::vector_length(this);
24306 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24307 %}
24308 ins_pipe( pipe_slow );
24309 %}
24310
24311 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24312
24313 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24314 predicate(!VM_Version::supports_gfni());
24315 match(Set dst (ReverseV src));
24316 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24317 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24318 ins_encode %{
24319 int vec_enc = vector_length_encoding(this);
24320 BasicType bt = Matcher::vector_element_basic_type(this);
24321 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24322 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24323 %}
24324 ins_pipe( pipe_slow );
24325 %}
24326
24327 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24328 predicate(VM_Version::supports_gfni());
24329 match(Set dst (ReverseV src));
24330 effect(TEMP dst, TEMP xtmp);
24331 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24332 ins_encode %{
24333 int vec_enc = vector_length_encoding(this);
24334 BasicType bt = Matcher::vector_element_basic_type(this);
24335 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24336 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24337 $xtmp$$XMMRegister);
24338 %}
24339 ins_pipe( pipe_slow );
24340 %}
24341
24342 instruct vreverse_byte_reg(vec dst, vec src) %{
24343 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24344 match(Set dst (ReverseBytesV src));
24345 effect(TEMP dst);
24346 format %{ "vector_reverse_byte $dst, $src" %}
24347 ins_encode %{
24348 int vec_enc = vector_length_encoding(this);
24349 BasicType bt = Matcher::vector_element_basic_type(this);
24350 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24351 %}
24352 ins_pipe( pipe_slow );
24353 %}
24354
24355 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24356 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24357 match(Set dst (ReverseBytesV src));
24358 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24359 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24360 ins_encode %{
24361 int vec_enc = vector_length_encoding(this);
24362 BasicType bt = Matcher::vector_element_basic_type(this);
24363 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24364 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24365 %}
24366 ins_pipe( pipe_slow );
24367 %}
24368
24369 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24370
24371 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24372 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24373 Matcher::vector_length_in_bytes(n->in(1))));
24374 match(Set dst (CountLeadingZerosV src));
24375 format %{ "vector_count_leading_zeros $dst, $src" %}
24376 ins_encode %{
24377 int vlen_enc = vector_length_encoding(this, $src);
24378 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24379 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24380 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24381 %}
24382 ins_pipe( pipe_slow );
24383 %}
24384
24385 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24386 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24387 Matcher::vector_length_in_bytes(n->in(1))));
24388 match(Set dst (CountLeadingZerosV src mask));
24389 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24390 ins_encode %{
24391 int vlen_enc = vector_length_encoding(this, $src);
24392 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24393 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24394 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24395 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24396 %}
24397 ins_pipe( pipe_slow );
24398 %}
24399
24400 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24401 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24402 VM_Version::supports_avx512cd() &&
24403 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24404 match(Set dst (CountLeadingZerosV src));
24405 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24406 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24407 ins_encode %{
24408 int vlen_enc = vector_length_encoding(this, $src);
24409 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24410 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24411 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24412 %}
24413 ins_pipe( pipe_slow );
24414 %}
24415
24416 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24417 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24418 match(Set dst (CountLeadingZerosV src));
24419 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24420 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24421 ins_encode %{
24422 int vlen_enc = vector_length_encoding(this, $src);
24423 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24424 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24425 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24426 $rtmp$$Register, true, vlen_enc);
24427 %}
24428 ins_pipe( pipe_slow );
24429 %}
24430
24431 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24432 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24433 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24434 match(Set dst (CountLeadingZerosV src));
24435 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24436 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24437 ins_encode %{
24438 int vlen_enc = vector_length_encoding(this, $src);
24439 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24440 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24441 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24442 %}
24443 ins_pipe( pipe_slow );
24444 %}
24445
24446 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24447 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24448 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24449 match(Set dst (CountLeadingZerosV src));
24450 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24451 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24452 ins_encode %{
24453 int vlen_enc = vector_length_encoding(this, $src);
24454 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24455 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24456 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24457 %}
24458 ins_pipe( pipe_slow );
24459 %}
24460
24461 // ---------------------------------- Vector Masked Operations ------------------------------------
24462
24463 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24464 match(Set dst (AddVB (Binary dst src2) mask));
24465 match(Set dst (AddVS (Binary dst src2) mask));
24466 match(Set dst (AddVI (Binary dst src2) mask));
24467 match(Set dst (AddVL (Binary dst src2) mask));
24468 match(Set dst (AddVF (Binary dst src2) mask));
24469 match(Set dst (AddVD (Binary dst src2) mask));
24470 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24471 ins_encode %{
24472 int vlen_enc = vector_length_encoding(this);
24473 BasicType bt = Matcher::vector_element_basic_type(this);
24474 int opc = this->ideal_Opcode();
24475 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24476 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24477 %}
24478 ins_pipe( pipe_slow );
24479 %}
24480
24481 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24482 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24483 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24484 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24485 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24486 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24487 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24488 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24489 ins_encode %{
24490 int vlen_enc = vector_length_encoding(this);
24491 BasicType bt = Matcher::vector_element_basic_type(this);
24492 int opc = this->ideal_Opcode();
24493 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24495 %}
24496 ins_pipe( pipe_slow );
24497 %}
24498
24499 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24500 match(Set dst (XorV (Binary dst src2) mask));
24501 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24502 ins_encode %{
24503 int vlen_enc = vector_length_encoding(this);
24504 BasicType bt = Matcher::vector_element_basic_type(this);
24505 int opc = this->ideal_Opcode();
24506 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24507 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24508 %}
24509 ins_pipe( pipe_slow );
24510 %}
24511
24512 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24513 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24514 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24515 ins_encode %{
24516 int vlen_enc = vector_length_encoding(this);
24517 BasicType bt = Matcher::vector_element_basic_type(this);
24518 int opc = this->ideal_Opcode();
24519 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24520 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24521 %}
24522 ins_pipe( pipe_slow );
24523 %}
24524
24525 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24526 match(Set dst (OrV (Binary dst src2) mask));
24527 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24528 ins_encode %{
24529 int vlen_enc = vector_length_encoding(this);
24530 BasicType bt = Matcher::vector_element_basic_type(this);
24531 int opc = this->ideal_Opcode();
24532 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24533 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24534 %}
24535 ins_pipe( pipe_slow );
24536 %}
24537
24538 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24539 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24540 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24541 ins_encode %{
24542 int vlen_enc = vector_length_encoding(this);
24543 BasicType bt = Matcher::vector_element_basic_type(this);
24544 int opc = this->ideal_Opcode();
24545 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24546 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24547 %}
24548 ins_pipe( pipe_slow );
24549 %}
24550
24551 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24552 match(Set dst (AndV (Binary dst src2) mask));
24553 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24554 ins_encode %{
24555 int vlen_enc = vector_length_encoding(this);
24556 BasicType bt = Matcher::vector_element_basic_type(this);
24557 int opc = this->ideal_Opcode();
24558 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24559 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24560 %}
24561 ins_pipe( pipe_slow );
24562 %}
24563
24564 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24565 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24566 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24567 ins_encode %{
24568 int vlen_enc = vector_length_encoding(this);
24569 BasicType bt = Matcher::vector_element_basic_type(this);
24570 int opc = this->ideal_Opcode();
24571 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24572 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24573 %}
24574 ins_pipe( pipe_slow );
24575 %}
24576
24577 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24578 match(Set dst (SubVB (Binary dst src2) mask));
24579 match(Set dst (SubVS (Binary dst src2) mask));
24580 match(Set dst (SubVI (Binary dst src2) mask));
24581 match(Set dst (SubVL (Binary dst src2) mask));
24582 match(Set dst (SubVF (Binary dst src2) mask));
24583 match(Set dst (SubVD (Binary dst src2) mask));
24584 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24585 ins_encode %{
24586 int vlen_enc = vector_length_encoding(this);
24587 BasicType bt = Matcher::vector_element_basic_type(this);
24588 int opc = this->ideal_Opcode();
24589 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24590 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24591 %}
24592 ins_pipe( pipe_slow );
24593 %}
24594
24595 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24596 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24597 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24598 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24599 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24600 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24601 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24602 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24603 ins_encode %{
24604 int vlen_enc = vector_length_encoding(this);
24605 BasicType bt = Matcher::vector_element_basic_type(this);
24606 int opc = this->ideal_Opcode();
24607 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24608 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24609 %}
24610 ins_pipe( pipe_slow );
24611 %}
24612
24613 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24614 match(Set dst (MulVS (Binary dst src2) mask));
24615 match(Set dst (MulVI (Binary dst src2) mask));
24616 match(Set dst (MulVL (Binary dst src2) mask));
24617 match(Set dst (MulVF (Binary dst src2) mask));
24618 match(Set dst (MulVD (Binary dst src2) mask));
24619 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24620 ins_encode %{
24621 int vlen_enc = vector_length_encoding(this);
24622 BasicType bt = Matcher::vector_element_basic_type(this);
24623 int opc = this->ideal_Opcode();
24624 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24625 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24626 %}
24627 ins_pipe( pipe_slow );
24628 %}
24629
24630 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24631 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24632 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24633 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24634 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24635 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24636 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24637 ins_encode %{
24638 int vlen_enc = vector_length_encoding(this);
24639 BasicType bt = Matcher::vector_element_basic_type(this);
24640 int opc = this->ideal_Opcode();
24641 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24642 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24643 %}
24644 ins_pipe( pipe_slow );
24645 %}
24646
24647 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24648 match(Set dst (SqrtVF dst mask));
24649 match(Set dst (SqrtVD dst mask));
24650 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24651 ins_encode %{
24652 int vlen_enc = vector_length_encoding(this);
24653 BasicType bt = Matcher::vector_element_basic_type(this);
24654 int opc = this->ideal_Opcode();
24655 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24656 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24657 %}
24658 ins_pipe( pipe_slow );
24659 %}
24660
24661 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24662 match(Set dst (DivVF (Binary dst src2) mask));
24663 match(Set dst (DivVD (Binary dst src2) mask));
24664 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24665 ins_encode %{
24666 int vlen_enc = vector_length_encoding(this);
24667 BasicType bt = Matcher::vector_element_basic_type(this);
24668 int opc = this->ideal_Opcode();
24669 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24670 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24671 %}
24672 ins_pipe( pipe_slow );
24673 %}
24674
24675 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24676 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24677 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24678 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24679 ins_encode %{
24680 int vlen_enc = vector_length_encoding(this);
24681 BasicType bt = Matcher::vector_element_basic_type(this);
24682 int opc = this->ideal_Opcode();
24683 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24684 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24685 %}
24686 ins_pipe( pipe_slow );
24687 %}
24688
24689
24690 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24691 match(Set dst (RotateLeftV (Binary dst shift) mask));
24692 match(Set dst (RotateRightV (Binary dst shift) mask));
24693 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24694 ins_encode %{
24695 int vlen_enc = vector_length_encoding(this);
24696 BasicType bt = Matcher::vector_element_basic_type(this);
24697 int opc = this->ideal_Opcode();
24698 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24699 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24700 %}
24701 ins_pipe( pipe_slow );
24702 %}
24703
24704 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24705 match(Set dst (RotateLeftV (Binary dst src2) mask));
24706 match(Set dst (RotateRightV (Binary dst src2) mask));
24707 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24708 ins_encode %{
24709 int vlen_enc = vector_length_encoding(this);
24710 BasicType bt = Matcher::vector_element_basic_type(this);
24711 int opc = this->ideal_Opcode();
24712 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24713 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24714 %}
24715 ins_pipe( pipe_slow );
24716 %}
24717
24718 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24719 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24720 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24721 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24722 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24723 ins_encode %{
24724 int vlen_enc = vector_length_encoding(this);
24725 BasicType bt = Matcher::vector_element_basic_type(this);
24726 int opc = this->ideal_Opcode();
24727 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24728 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24729 %}
24730 ins_pipe( pipe_slow );
24731 %}
24732
24733 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24734 predicate(!n->as_ShiftV()->is_var_shift());
24735 match(Set dst (LShiftVS (Binary dst src2) mask));
24736 match(Set dst (LShiftVI (Binary dst src2) mask));
24737 match(Set dst (LShiftVL (Binary dst src2) mask));
24738 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24739 ins_encode %{
24740 int vlen_enc = vector_length_encoding(this);
24741 BasicType bt = Matcher::vector_element_basic_type(this);
24742 int opc = this->ideal_Opcode();
24743 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24744 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24745 %}
24746 ins_pipe( pipe_slow );
24747 %}
24748
24749 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24750 predicate(n->as_ShiftV()->is_var_shift());
24751 match(Set dst (LShiftVS (Binary dst src2) mask));
24752 match(Set dst (LShiftVI (Binary dst src2) mask));
24753 match(Set dst (LShiftVL (Binary dst src2) mask));
24754 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24755 ins_encode %{
24756 int vlen_enc = vector_length_encoding(this);
24757 BasicType bt = Matcher::vector_element_basic_type(this);
24758 int opc = this->ideal_Opcode();
24759 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24761 %}
24762 ins_pipe( pipe_slow );
24763 %}
24764
24765 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24766 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24767 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24768 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24769 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24770 ins_encode %{
24771 int vlen_enc = vector_length_encoding(this);
24772 BasicType bt = Matcher::vector_element_basic_type(this);
24773 int opc = this->ideal_Opcode();
24774 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24775 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24776 %}
24777 ins_pipe( pipe_slow );
24778 %}
24779
24780 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24781 predicate(!n->as_ShiftV()->is_var_shift());
24782 match(Set dst (RShiftVS (Binary dst src2) mask));
24783 match(Set dst (RShiftVI (Binary dst src2) mask));
24784 match(Set dst (RShiftVL (Binary dst src2) mask));
24785 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24786 ins_encode %{
24787 int vlen_enc = vector_length_encoding(this);
24788 BasicType bt = Matcher::vector_element_basic_type(this);
24789 int opc = this->ideal_Opcode();
24790 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24791 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24792 %}
24793 ins_pipe( pipe_slow );
24794 %}
24795
24796 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24797 predicate(n->as_ShiftV()->is_var_shift());
24798 match(Set dst (RShiftVS (Binary dst src2) mask));
24799 match(Set dst (RShiftVI (Binary dst src2) mask));
24800 match(Set dst (RShiftVL (Binary dst src2) mask));
24801 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24802 ins_encode %{
24803 int vlen_enc = vector_length_encoding(this);
24804 BasicType bt = Matcher::vector_element_basic_type(this);
24805 int opc = this->ideal_Opcode();
24806 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24807 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24808 %}
24809 ins_pipe( pipe_slow );
24810 %}
24811
24812 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24813 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24814 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24815 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24816 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24817 ins_encode %{
24818 int vlen_enc = vector_length_encoding(this);
24819 BasicType bt = Matcher::vector_element_basic_type(this);
24820 int opc = this->ideal_Opcode();
24821 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24822 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24823 %}
24824 ins_pipe( pipe_slow );
24825 %}
24826
24827 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24828 predicate(!n->as_ShiftV()->is_var_shift());
24829 match(Set dst (URShiftVS (Binary dst src2) mask));
24830 match(Set dst (URShiftVI (Binary dst src2) mask));
24831 match(Set dst (URShiftVL (Binary dst src2) mask));
24832 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24833 ins_encode %{
24834 int vlen_enc = vector_length_encoding(this);
24835 BasicType bt = Matcher::vector_element_basic_type(this);
24836 int opc = this->ideal_Opcode();
24837 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24838 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24839 %}
24840 ins_pipe( pipe_slow );
24841 %}
24842
24843 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24844 predicate(n->as_ShiftV()->is_var_shift());
24845 match(Set dst (URShiftVS (Binary dst src2) mask));
24846 match(Set dst (URShiftVI (Binary dst src2) mask));
24847 match(Set dst (URShiftVL (Binary dst src2) mask));
24848 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24849 ins_encode %{
24850 int vlen_enc = vector_length_encoding(this);
24851 BasicType bt = Matcher::vector_element_basic_type(this);
24852 int opc = this->ideal_Opcode();
24853 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24854 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24855 %}
24856 ins_pipe( pipe_slow );
24857 %}
24858
24859 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24860 match(Set dst (MaxV (Binary dst src2) mask));
24861 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24862 ins_encode %{
24863 int vlen_enc = vector_length_encoding(this);
24864 BasicType bt = Matcher::vector_element_basic_type(this);
24865 int opc = this->ideal_Opcode();
24866 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24867 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24868 %}
24869 ins_pipe( pipe_slow );
24870 %}
24871
24872 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24873 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24874 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24875 ins_encode %{
24876 int vlen_enc = vector_length_encoding(this);
24877 BasicType bt = Matcher::vector_element_basic_type(this);
24878 int opc = this->ideal_Opcode();
24879 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24880 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24881 %}
24882 ins_pipe( pipe_slow );
24883 %}
24884
24885 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24886 match(Set dst (MinV (Binary dst src2) mask));
24887 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24888 ins_encode %{
24889 int vlen_enc = vector_length_encoding(this);
24890 BasicType bt = Matcher::vector_element_basic_type(this);
24891 int opc = this->ideal_Opcode();
24892 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24893 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24894 %}
24895 ins_pipe( pipe_slow );
24896 %}
24897
24898 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24899 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24900 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24901 ins_encode %{
24902 int vlen_enc = vector_length_encoding(this);
24903 BasicType bt = Matcher::vector_element_basic_type(this);
24904 int opc = this->ideal_Opcode();
24905 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24906 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24907 %}
24908 ins_pipe( pipe_slow );
24909 %}
24910
24911 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24912 match(Set dst (VectorRearrange (Binary dst src2) mask));
24913 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24914 ins_encode %{
24915 int vlen_enc = vector_length_encoding(this);
24916 BasicType bt = Matcher::vector_element_basic_type(this);
24917 int opc = this->ideal_Opcode();
24918 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24919 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24920 %}
24921 ins_pipe( pipe_slow );
24922 %}
24923
24924 instruct vabs_masked(vec dst, kReg mask) %{
24925 match(Set dst (AbsVB dst mask));
24926 match(Set dst (AbsVS dst mask));
24927 match(Set dst (AbsVI dst mask));
24928 match(Set dst (AbsVL dst mask));
24929 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24930 ins_encode %{
24931 int vlen_enc = vector_length_encoding(this);
24932 BasicType bt = Matcher::vector_element_basic_type(this);
24933 int opc = this->ideal_Opcode();
24934 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24935 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24936 %}
24937 ins_pipe( pipe_slow );
24938 %}
24939
24940 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24941 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24942 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24943 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24944 ins_encode %{
24945 assert(UseFMA, "Needs FMA instructions support.");
24946 int vlen_enc = vector_length_encoding(this);
24947 BasicType bt = Matcher::vector_element_basic_type(this);
24948 int opc = this->ideal_Opcode();
24949 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24950 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24951 %}
24952 ins_pipe( pipe_slow );
24953 %}
24954
24955 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24956 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24957 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24958 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24959 ins_encode %{
24960 assert(UseFMA, "Needs FMA instructions support.");
24961 int vlen_enc = vector_length_encoding(this);
24962 BasicType bt = Matcher::vector_element_basic_type(this);
24963 int opc = this->ideal_Opcode();
24964 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24965 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24966 %}
24967 ins_pipe( pipe_slow );
24968 %}
24969
24970 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24971 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24972 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24973 ins_encode %{
24974 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24975 int vlen_enc = vector_length_encoding(this, $src1);
24976 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24977
24978 // Comparison i
24979 switch (src1_elem_bt) {
24980 case T_BYTE: {
24981 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24982 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24983 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24984 break;
24985 }
24986 case T_SHORT: {
24987 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24988 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24989 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24990 break;
24991 }
24992 case T_INT: {
24993 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24994 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24995 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24996 break;
24997 }
24998 case T_LONG: {
24999 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25000 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25001 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25002 break;
25003 }
25004 case T_FLOAT: {
25005 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25006 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25007 break;
25008 }
25009 case T_DOUBLE: {
25010 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25011 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25012 break;
25013 }
25014 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25015 }
25016 %}
25017 ins_pipe( pipe_slow );
25018 %}
25019
25020 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25021 predicate(Matcher::vector_length(n) <= 32);
25022 match(Set dst (MaskAll src));
25023 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25024 ins_encode %{
25025 int mask_len = Matcher::vector_length(this);
25026 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25027 %}
25028 ins_pipe( pipe_slow );
25029 %}
25030
25031 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25032 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25033 match(Set dst (XorVMask src (MaskAll cnt)));
25034 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25035 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25036 ins_encode %{
25037 uint masklen = Matcher::vector_length(this);
25038 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25039 %}
25040 ins_pipe( pipe_slow );
25041 %}
25042
25043 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25044 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25045 (Matcher::vector_length(n) == 16) ||
25046 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25047 match(Set dst (XorVMask src (MaskAll cnt)));
25048 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25049 ins_encode %{
25050 uint masklen = Matcher::vector_length(this);
25051 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25052 %}
25053 ins_pipe( pipe_slow );
25054 %}
25055
25056 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25057 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25058 match(Set dst (VectorLongToMask src));
25059 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25060 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25061 ins_encode %{
25062 int mask_len = Matcher::vector_length(this);
25063 int vec_enc = vector_length_encoding(mask_len);
25064 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25065 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25066 %}
25067 ins_pipe( pipe_slow );
25068 %}
25069
25070
25071 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25072 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25073 match(Set dst (VectorLongToMask src));
25074 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25075 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25076 ins_encode %{
25077 int mask_len = Matcher::vector_length(this);
25078 assert(mask_len <= 32, "invalid mask length");
25079 int vec_enc = vector_length_encoding(mask_len);
25080 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25081 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25082 %}
25083 ins_pipe( pipe_slow );
25084 %}
25085
25086 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25087 predicate(n->bottom_type()->isa_vectmask());
25088 match(Set dst (VectorLongToMask src));
25089 format %{ "long_to_mask_evex $dst, $src\t!" %}
25090 ins_encode %{
25091 __ kmov($dst$$KRegister, $src$$Register);
25092 %}
25093 ins_pipe( pipe_slow );
25094 %}
25095
25096 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25097 match(Set dst (AndVMask src1 src2));
25098 match(Set dst (OrVMask src1 src2));
25099 match(Set dst (XorVMask src1 src2));
25100 effect(TEMP kscratch);
25101 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25102 ins_encode %{
25103 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25104 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25105 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25106 uint masklen = Matcher::vector_length(this);
25107 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25108 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25109 %}
25110 ins_pipe( pipe_slow );
25111 %}
25112
25113 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25114 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25115 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25116 ins_encode %{
25117 int vlen_enc = vector_length_encoding(this);
25118 BasicType bt = Matcher::vector_element_basic_type(this);
25119 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25120 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25121 %}
25122 ins_pipe( pipe_slow );
25123 %}
25124
25125 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25126 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25127 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25128 ins_encode %{
25129 int vlen_enc = vector_length_encoding(this);
25130 BasicType bt = Matcher::vector_element_basic_type(this);
25131 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25132 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25133 %}
25134 ins_pipe( pipe_slow );
25135 %}
25136
25137 instruct castMM(kReg dst)
25138 %{
25139 match(Set dst (CastVV dst));
25140
25141 size(0);
25142 format %{ "# castVV of $dst" %}
25143 ins_encode(/* empty encoding */);
25144 ins_cost(0);
25145 ins_pipe(empty);
25146 %}
25147
25148 instruct castVV(vec dst)
25149 %{
25150 match(Set dst (CastVV dst));
25151
25152 size(0);
25153 format %{ "# castVV of $dst" %}
25154 ins_encode(/* empty encoding */);
25155 ins_cost(0);
25156 ins_pipe(empty);
25157 %}
25158
25159 instruct castVVLeg(legVec dst)
25160 %{
25161 match(Set dst (CastVV dst));
25162
25163 size(0);
25164 format %{ "# castVV of $dst" %}
25165 ins_encode(/* empty encoding */);
25166 ins_cost(0);
25167 ins_pipe(empty);
25168 %}
25169
25170 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25171 %{
25172 match(Set dst (IsInfiniteF src));
25173 effect(TEMP ktmp, KILL cr);
25174 format %{ "float_class_check $dst, $src" %}
25175 ins_encode %{
25176 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25177 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25178 %}
25179 ins_pipe(pipe_slow);
25180 %}
25181
25182 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25183 %{
25184 match(Set dst (IsInfiniteD src));
25185 effect(TEMP ktmp, KILL cr);
25186 format %{ "double_class_check $dst, $src" %}
25187 ins_encode %{
25188 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25189 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25190 %}
25191 ins_pipe(pipe_slow);
25192 %}
25193
25194 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25195 %{
25196 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25197 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25198 match(Set dst (SaturatingAddV src1 src2));
25199 match(Set dst (SaturatingSubV src1 src2));
25200 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25201 ins_encode %{
25202 int vlen_enc = vector_length_encoding(this);
25203 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25204 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25205 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25206 %}
25207 ins_pipe(pipe_slow);
25208 %}
25209
25210 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25211 %{
25212 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25213 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25214 match(Set dst (SaturatingAddV src1 src2));
25215 match(Set dst (SaturatingSubV src1 src2));
25216 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25217 ins_encode %{
25218 int vlen_enc = vector_length_encoding(this);
25219 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25220 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25221 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25222 %}
25223 ins_pipe(pipe_slow);
25224 %}
25225
25226 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25227 %{
25228 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25229 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25230 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25231 match(Set dst (SaturatingAddV src1 src2));
25232 match(Set dst (SaturatingSubV src1 src2));
25233 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25234 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25235 ins_encode %{
25236 int vlen_enc = vector_length_encoding(this);
25237 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25238 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25239 $src1$$XMMRegister, $src2$$XMMRegister,
25240 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25241 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25242 %}
25243 ins_pipe(pipe_slow);
25244 %}
25245
25246 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25247 %{
25248 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25249 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25250 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25251 match(Set dst (SaturatingAddV src1 src2));
25252 match(Set dst (SaturatingSubV src1 src2));
25253 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25254 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25255 ins_encode %{
25256 int vlen_enc = vector_length_encoding(this);
25257 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25258 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25259 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25260 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25261 %}
25262 ins_pipe(pipe_slow);
25263 %}
25264
25265 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25266 %{
25267 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25268 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25269 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25270 match(Set dst (SaturatingAddV src1 src2));
25271 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25272 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25273 ins_encode %{
25274 int vlen_enc = vector_length_encoding(this);
25275 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25276 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25277 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25278 %}
25279 ins_pipe(pipe_slow);
25280 %}
25281
25282 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25283 %{
25284 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25285 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25286 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25287 match(Set dst (SaturatingAddV src1 src2));
25288 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25289 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25290 ins_encode %{
25291 int vlen_enc = vector_length_encoding(this);
25292 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25293 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25294 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25295 %}
25296 ins_pipe(pipe_slow);
25297 %}
25298
25299 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25300 %{
25301 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25302 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25303 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25304 match(Set dst (SaturatingSubV src1 src2));
25305 effect(TEMP ktmp);
25306 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25307 ins_encode %{
25308 int vlen_enc = vector_length_encoding(this);
25309 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25310 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25311 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25312 %}
25313 ins_pipe(pipe_slow);
25314 %}
25315
25316 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25317 %{
25318 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25319 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25320 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25321 match(Set dst (SaturatingSubV src1 src2));
25322 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25323 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25324 ins_encode %{
25325 int vlen_enc = vector_length_encoding(this);
25326 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25327 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25328 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25329 %}
25330 ins_pipe(pipe_slow);
25331 %}
25332
25333 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25334 %{
25335 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25336 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25337 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25338 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25339 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25340 ins_encode %{
25341 int vlen_enc = vector_length_encoding(this);
25342 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25343 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25344 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25345 %}
25346 ins_pipe(pipe_slow);
25347 %}
25348
25349 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25350 %{
25351 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25352 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25353 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25354 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25355 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25356 ins_encode %{
25357 int vlen_enc = vector_length_encoding(this);
25358 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25359 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25360 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25361 %}
25362 ins_pipe(pipe_slow);
25363 %}
25364
25365 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25366 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25367 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25368 match(Set dst (SaturatingAddV (Binary dst src) mask));
25369 match(Set dst (SaturatingSubV (Binary dst src) mask));
25370 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25371 ins_encode %{
25372 int vlen_enc = vector_length_encoding(this);
25373 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25374 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25375 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25376 %}
25377 ins_pipe( pipe_slow );
25378 %}
25379
25380 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25381 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25382 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25383 match(Set dst (SaturatingAddV (Binary dst src) mask));
25384 match(Set dst (SaturatingSubV (Binary dst src) mask));
25385 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25386 ins_encode %{
25387 int vlen_enc = vector_length_encoding(this);
25388 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25389 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25390 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25391 %}
25392 ins_pipe( pipe_slow );
25393 %}
25394
25395 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25396 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25397 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25398 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25399 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25400 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25401 ins_encode %{
25402 int vlen_enc = vector_length_encoding(this);
25403 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25404 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25405 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25406 %}
25407 ins_pipe( pipe_slow );
25408 %}
25409
25410 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25411 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25412 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25413 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25414 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25415 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25416 ins_encode %{
25417 int vlen_enc = vector_length_encoding(this);
25418 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25419 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25420 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25421 %}
25422 ins_pipe( pipe_slow );
25423 %}
25424
25425 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25426 %{
25427 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25428 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25429 ins_encode %{
25430 int vlen_enc = vector_length_encoding(this);
25431 BasicType bt = Matcher::vector_element_basic_type(this);
25432 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25433 %}
25434 ins_pipe(pipe_slow);
25435 %}
25436
25437 instruct reinterpretS2HF(regF dst, rRegI src)
25438 %{
25439 match(Set dst (ReinterpretS2HF src));
25440 format %{ "vmovw $dst, $src" %}
25441 ins_encode %{
25442 __ vmovw($dst$$XMMRegister, $src$$Register);
25443 %}
25444 ins_pipe(pipe_slow);
25445 %}
25446
25447 instruct reinterpretHF2S(rRegI dst, regF src)
25448 %{
25449 match(Set dst (ReinterpretHF2S src));
25450 format %{ "vmovw $dst, $src" %}
25451 ins_encode %{
25452 __ vmovw($dst$$Register, $src$$XMMRegister);
25453 %}
25454 ins_pipe(pipe_slow);
25455 %}
25456
25457 instruct convF2HFAndS2HF(regF dst, regF src)
25458 %{
25459 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25460 format %{ "convF2HFAndS2HF $dst, $src" %}
25461 ins_encode %{
25462 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25463 %}
25464 ins_pipe(pipe_slow);
25465 %}
25466
25467 instruct convHF2SAndHF2F(regF dst, regF src)
25468 %{
25469 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25470 format %{ "convHF2SAndHF2F $dst, $src" %}
25471 ins_encode %{
25472 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25473 %}
25474 ins_pipe(pipe_slow);
25475 %}
25476
25477 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25478 %{
25479 match(Set dst (SqrtHF src));
25480 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25481 ins_encode %{
25482 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25483 %}
25484 ins_pipe(pipe_slow);
25485 %}
25486
25487 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25488 %{
25489 match(Set dst (AddHF src1 src2));
25490 match(Set dst (DivHF src1 src2));
25491 match(Set dst (MulHF src1 src2));
25492 match(Set dst (SubHF src1 src2));
25493 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25494 ins_encode %{
25495 int opcode = this->ideal_Opcode();
25496 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25497 %}
25498 ins_pipe(pipe_slow);
25499 %}
25500
25501 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25502 %{
25503 predicate(VM_Version::supports_avx10_2());
25504 match(Set dst (MaxHF src1 src2));
25505 match(Set dst (MinHF src1 src2));
25506 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25507 ins_encode %{
25508 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25509 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25510 %}
25511 ins_pipe( pipe_slow );
25512 %}
25513
25514 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25515 %{
25516 predicate(!VM_Version::supports_avx10_2());
25517 match(Set dst (MaxHF src1 src2));
25518 match(Set dst (MinHF src1 src2));
25519 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25520 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25521 ins_encode %{
25522 int opcode = this->ideal_Opcode();
25523 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25524 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25525 %}
25526 ins_pipe( pipe_slow );
25527 %}
25528
25529 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25530 %{
25531 match(Set dst (FmaHF src2 (Binary dst src1)));
25532 effect(DEF dst);
25533 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25534 ins_encode %{
25535 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25536 %}
25537 ins_pipe( pipe_slow );
25538 %}
25539
25540
25541 instruct vector_sqrt_HF_reg(vec dst, vec src)
25542 %{
25543 match(Set dst (SqrtVHF src));
25544 format %{ "vector_sqrt_fp16 $dst, $src" %}
25545 ins_encode %{
25546 int vlen_enc = vector_length_encoding(this);
25547 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25548 %}
25549 ins_pipe(pipe_slow);
25550 %}
25551
25552 instruct vector_sqrt_HF_mem(vec dst, memory src)
25553 %{
25554 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25555 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25556 ins_encode %{
25557 int vlen_enc = vector_length_encoding(this);
25558 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25559 %}
25560 ins_pipe(pipe_slow);
25561 %}
25562
25563 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25564 %{
25565 match(Set dst (AddVHF src1 src2));
25566 match(Set dst (DivVHF src1 src2));
25567 match(Set dst (MulVHF src1 src2));
25568 match(Set dst (SubVHF src1 src2));
25569 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25570 ins_encode %{
25571 int vlen_enc = vector_length_encoding(this);
25572 int opcode = this->ideal_Opcode();
25573 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25574 %}
25575 ins_pipe(pipe_slow);
25576 %}
25577
25578
25579 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25580 %{
25581 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25582 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25583 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25584 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25585 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25586 ins_encode %{
25587 int vlen_enc = vector_length_encoding(this);
25588 int opcode = this->ideal_Opcode();
25589 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25590 %}
25591 ins_pipe(pipe_slow);
25592 %}
25593
25594 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25595 %{
25596 match(Set dst (FmaVHF src2 (Binary dst src1)));
25597 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25598 ins_encode %{
25599 int vlen_enc = vector_length_encoding(this);
25600 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25601 %}
25602 ins_pipe( pipe_slow );
25603 %}
25604
25605 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25606 %{
25607 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25608 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25609 ins_encode %{
25610 int vlen_enc = vector_length_encoding(this);
25611 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25612 %}
25613 ins_pipe( pipe_slow );
25614 %}
25615
25616 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25617 %{
25618 predicate(VM_Version::supports_avx10_2());
25619 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25620 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25621 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25622 ins_encode %{
25623 int vlen_enc = vector_length_encoding(this);
25624 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25625 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25626 %}
25627 ins_pipe( pipe_slow );
25628 %}
25629
25630 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25631 %{
25632 predicate(VM_Version::supports_avx10_2());
25633 match(Set dst (MinVHF src1 src2));
25634 match(Set dst (MaxVHF src1 src2));
25635 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25636 ins_encode %{
25637 int vlen_enc = vector_length_encoding(this);
25638 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25639 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25640 %}
25641 ins_pipe( pipe_slow );
25642 %}
25643
25644 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25645 %{
25646 predicate(!VM_Version::supports_avx10_2());
25647 match(Set dst (MinVHF src1 src2));
25648 match(Set dst (MaxVHF src1 src2));
25649 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25650 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25651 ins_encode %{
25652 int vlen_enc = vector_length_encoding(this);
25653 int opcode = this->ideal_Opcode();
25654 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25655 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25656 %}
25657 ins_pipe( pipe_slow );
25658 %}
25659
25660 //----------PEEPHOLE RULES-----------------------------------------------------
25661 // These must follow all instruction definitions as they use the names
25662 // defined in the instructions definitions.
25663 //
25664 // peeppredicate ( rule_predicate );
25665 // // the predicate unless which the peephole rule will be ignored
25666 //
25667 // peepmatch ( root_instr_name [preceding_instruction]* );
25668 //
25669 // peepprocedure ( procedure_name );
25670 // // provide a procedure name to perform the optimization, the procedure should
25671 // // reside in the architecture dependent peephole file, the method has the
25672 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25673 // // with the arguments being the basic block, the current node index inside the
25674 // // block, the register allocator, the functions upon invoked return a new node
25675 // // defined in peepreplace, and the rules of the nodes appearing in the
25676 // // corresponding peepmatch, the function return true if successful, else
25677 // // return false
25678 //
25679 // peepconstraint %{
25680 // (instruction_number.operand_name relational_op instruction_number.operand_name
25681 // [, ...] );
25682 // // instruction numbers are zero-based using left to right order in peepmatch
25683 //
25684 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25685 // // provide an instruction_number.operand_name for each operand that appears
25686 // // in the replacement instruction's match rule
25687 //
25688 // ---------VM FLAGS---------------------------------------------------------
25689 //
25690 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25691 //
25692 // Each peephole rule is given an identifying number starting with zero and
25693 // increasing by one in the order seen by the parser. An individual peephole
25694 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25695 // on the command-line.
25696 //
25697 // ---------CURRENT LIMITATIONS----------------------------------------------
25698 //
25699 // Only transformations inside a basic block (do we need more for peephole)
25700 //
25701 // ---------EXAMPLE----------------------------------------------------------
25702 //
25703 // // pertinent parts of existing instructions in architecture description
25704 // instruct movI(rRegI dst, rRegI src)
25705 // %{
25706 // match(Set dst (CopyI src));
25707 // %}
25708 //
25709 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25710 // %{
25711 // match(Set dst (AddI dst src));
25712 // effect(KILL cr);
25713 // %}
25714 //
25715 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25716 // %{
25717 // match(Set dst (AddI dst src));
25718 // %}
25719 //
25720 // 1. Simple replacement
25721 // - Only match adjacent instructions in same basic block
25722 // - Only equality constraints
25723 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25724 // - Only one replacement instruction
25725 //
25726 // // Change (inc mov) to lea
25727 // peephole %{
25728 // // lea should only be emitted when beneficial
25729 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25730 // // increment preceded by register-register move
25731 // peepmatch ( incI_rReg movI );
25732 // // require that the destination register of the increment
25733 // // match the destination register of the move
25734 // peepconstraint ( 0.dst == 1.dst );
25735 // // construct a replacement instruction that sets
25736 // // the destination to ( move's source register + one )
25737 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25738 // %}
25739 //
25740 // 2. Procedural replacement
25741 // - More flexible finding relevent nodes
25742 // - More flexible constraints
25743 // - More flexible transformations
25744 // - May utilise architecture-dependent API more effectively
25745 // - Currently only one replacement instruction due to adlc parsing capabilities
25746 //
25747 // // Change (inc mov) to lea
25748 // peephole %{
25749 // // lea should only be emitted when beneficial
25750 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25751 // // the rule numbers of these nodes inside are passed into the function below
25752 // peepmatch ( incI_rReg movI );
25753 // // the method that takes the responsibility of transformation
25754 // peepprocedure ( inc_mov_to_lea );
25755 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25756 // // node is passed into the function above
25757 // peepreplace ( leaI_rReg_immI() );
25758 // %}
25759
25760 // These instructions is not matched by the matcher but used by the peephole
25761 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25762 %{
25763 predicate(false);
25764 match(Set dst (AddI src1 src2));
25765 format %{ "leal $dst, [$src1 + $src2]" %}
25766 ins_encode %{
25767 Register dst = $dst$$Register;
25768 Register src1 = $src1$$Register;
25769 Register src2 = $src2$$Register;
25770 if (src1 != rbp && src1 != r13) {
25771 __ leal(dst, Address(src1, src2, Address::times_1));
25772 } else {
25773 assert(src2 != rbp && src2 != r13, "");
25774 __ leal(dst, Address(src2, src1, Address::times_1));
25775 }
25776 %}
25777 ins_pipe(ialu_reg_reg);
25778 %}
25779
25780 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25781 %{
25782 predicate(false);
25783 match(Set dst (AddI src1 src2));
25784 format %{ "leal $dst, [$src1 + $src2]" %}
25785 ins_encode %{
25786 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25787 %}
25788 ins_pipe(ialu_reg_reg);
25789 %}
25790
25791 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25792 %{
25793 predicate(false);
25794 match(Set dst (LShiftI src shift));
25795 format %{ "leal $dst, [$src << $shift]" %}
25796 ins_encode %{
25797 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25798 Register src = $src$$Register;
25799 if (scale == Address::times_2 && src != rbp && src != r13) {
25800 __ leal($dst$$Register, Address(src, src, Address::times_1));
25801 } else {
25802 __ leal($dst$$Register, Address(noreg, src, scale));
25803 }
25804 %}
25805 ins_pipe(ialu_reg_reg);
25806 %}
25807
25808 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25809 %{
25810 predicate(false);
25811 match(Set dst (AddL src1 src2));
25812 format %{ "leaq $dst, [$src1 + $src2]" %}
25813 ins_encode %{
25814 Register dst = $dst$$Register;
25815 Register src1 = $src1$$Register;
25816 Register src2 = $src2$$Register;
25817 if (src1 != rbp && src1 != r13) {
25818 __ leaq(dst, Address(src1, src2, Address::times_1));
25819 } else {
25820 assert(src2 != rbp && src2 != r13, "");
25821 __ leaq(dst, Address(src2, src1, Address::times_1));
25822 }
25823 %}
25824 ins_pipe(ialu_reg_reg);
25825 %}
25826
25827 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25828 %{
25829 predicate(false);
25830 match(Set dst (AddL src1 src2));
25831 format %{ "leaq $dst, [$src1 + $src2]" %}
25832 ins_encode %{
25833 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25834 %}
25835 ins_pipe(ialu_reg_reg);
25836 %}
25837
25838 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25839 %{
25840 predicate(false);
25841 match(Set dst (LShiftL src shift));
25842 format %{ "leaq $dst, [$src << $shift]" %}
25843 ins_encode %{
25844 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25845 Register src = $src$$Register;
25846 if (scale == Address::times_2 && src != rbp && src != r13) {
25847 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25848 } else {
25849 __ leaq($dst$$Register, Address(noreg, src, scale));
25850 }
25851 %}
25852 ins_pipe(ialu_reg_reg);
25853 %}
25854
25855 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25856 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25857 // processors with at least partial ALU support for lea
25858 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25859 // beneficial for processors with full ALU support
25860 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25861
25862 peephole
25863 %{
25864 peeppredicate(VM_Version::supports_fast_2op_lea());
25865 peepmatch (addI_rReg);
25866 peepprocedure (lea_coalesce_reg);
25867 peepreplace (leaI_rReg_rReg_peep());
25868 %}
25869
25870 peephole
25871 %{
25872 peeppredicate(VM_Version::supports_fast_2op_lea());
25873 peepmatch (addI_rReg_imm);
25874 peepprocedure (lea_coalesce_imm);
25875 peepreplace (leaI_rReg_immI_peep());
25876 %}
25877
25878 peephole
25879 %{
25880 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25881 VM_Version::is_intel_cascade_lake());
25882 peepmatch (incI_rReg);
25883 peepprocedure (lea_coalesce_imm);
25884 peepreplace (leaI_rReg_immI_peep());
25885 %}
25886
25887 peephole
25888 %{
25889 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25890 VM_Version::is_intel_cascade_lake());
25891 peepmatch (decI_rReg);
25892 peepprocedure (lea_coalesce_imm);
25893 peepreplace (leaI_rReg_immI_peep());
25894 %}
25895
25896 peephole
25897 %{
25898 peeppredicate(VM_Version::supports_fast_2op_lea());
25899 peepmatch (salI_rReg_immI2);
25900 peepprocedure (lea_coalesce_imm);
25901 peepreplace (leaI_rReg_immI2_peep());
25902 %}
25903
25904 peephole
25905 %{
25906 peeppredicate(VM_Version::supports_fast_2op_lea());
25907 peepmatch (addL_rReg);
25908 peepprocedure (lea_coalesce_reg);
25909 peepreplace (leaL_rReg_rReg_peep());
25910 %}
25911
25912 peephole
25913 %{
25914 peeppredicate(VM_Version::supports_fast_2op_lea());
25915 peepmatch (addL_rReg_imm);
25916 peepprocedure (lea_coalesce_imm);
25917 peepreplace (leaL_rReg_immL32_peep());
25918 %}
25919
25920 peephole
25921 %{
25922 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25923 VM_Version::is_intel_cascade_lake());
25924 peepmatch (incL_rReg);
25925 peepprocedure (lea_coalesce_imm);
25926 peepreplace (leaL_rReg_immL32_peep());
25927 %}
25928
25929 peephole
25930 %{
25931 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25932 VM_Version::is_intel_cascade_lake());
25933 peepmatch (decL_rReg);
25934 peepprocedure (lea_coalesce_imm);
25935 peepreplace (leaL_rReg_immL32_peep());
25936 %}
25937
25938 peephole
25939 %{
25940 peeppredicate(VM_Version::supports_fast_2op_lea());
25941 peepmatch (salL_rReg_immI2);
25942 peepprocedure (lea_coalesce_imm);
25943 peepreplace (leaL_rReg_immI2_peep());
25944 %}
25945
25946 peephole
25947 %{
25948 peepmatch (leaPCompressedOopOffset);
25949 peepprocedure (lea_remove_redundant);
25950 %}
25951
25952 peephole
25953 %{
25954 peepmatch (leaP8Narrow);
25955 peepprocedure (lea_remove_redundant);
25956 %}
25957
25958 peephole
25959 %{
25960 peepmatch (leaP32Narrow);
25961 peepprocedure (lea_remove_redundant);
25962 %}
25963
25964 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25965 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25966
25967 //int variant
25968 peephole
25969 %{
25970 peepmatch (testI_reg);
25971 peepprocedure (test_may_remove);
25972 %}
25973
25974 //long variant
25975 peephole
25976 %{
25977 peepmatch (testL_reg);
25978 peepprocedure (test_may_remove);
25979 %}
25980
25981
25982 //----------SMARTSPILL RULES---------------------------------------------------
25983 // These must follow all instruction definitions as they use the names
25984 // defined in the instructions definitions.