1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 enum FP_PREC {
1712 fp_prec_hlf,
1713 fp_prec_flt,
1714 fp_prec_dbl
1715 };
1716
1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1718 XMMRegister p, XMMRegister q) {
1719 if (pt == fp_prec_hlf) {
1720 __ evucomish(p, q);
1721 } else if (pt == fp_prec_flt) {
1722 __ ucomiss(p, q);
1723 } else {
1724 __ ucomisd(p, q);
1725 }
1726 }
1727
1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1729 XMMRegister dst, XMMRegister src, Register scratch) {
1730 if (pt == fp_prec_hlf) {
1731 __ movhlf(dst, src, scratch);
1732 } else if (pt == fp_prec_flt) {
1733 __ movflt(dst, src);
1734 } else {
1735 __ movdbl(dst, src);
1736 }
1737 }
1738
1739 // Math.min() # Math.max()
1740 // -----------------------------
1741 // (v)ucomis[h/s/d] #
1742 // ja -> b # a
1743 // jp -> NaN # NaN
1744 // jb -> a # b
1745 // je -> a | b # a & b
1746 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1747 XMMRegister a, XMMRegister b, Register rt,
1748 bool min, enum FP_PREC pt) {
1749 Label nan, zero, below, above, done;
1750
1751 emit_fp_ucom(masm, pt, a, b);
1752
1753 if (dst->encoding() != (min ? b : a)->encoding()) {
1754 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1755 } else {
1756 __ jccb(Assembler::above, done);
1757 }
1758 __ jccb(Assembler::parity, nan); // PF=1
1759 __ jccb(Assembler::below, below); // CF=1
1760
1761 // equal
1762 // Using bitwise operations is a low cost way to compute the correct result
1763 // for zero and non-zero inputs in this scenario except for NaN, which is
1764 // handled separately. The mantissa and exponent are valid with either
1765 // bitwise operation. For zero inputs, the sign bit is chosen according to
1766 // whether a minimum or maximum value is required.
1767 if (min) {
1768 // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
1769 __ vpor(dst, a, b, Assembler::AVX_128bit);
1770 } else {
1771 // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
1772 __ vpand(dst, a, b, Assembler::AVX_128bit);
1773 }
1774 __ jmp(done);
1775
1776 __ bind(above);
1777 movfp(masm, pt, dst, min ? b : a, rt);
1778 __ jmp(done);
1779
1780 __ bind(nan);
1781 if (pt == fp_prec_hlf) {
1782 __ movl(rt, 0x00007e00); // Float16.NaN
1783 __ evmovw(dst, rt);
1784 } else if (pt == fp_prec_flt) {
1785 __ movl(rt, 0x7fc00000); // Float.NaN
1786 __ movdl(dst, rt);
1787 } else {
1788 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1789 __ movdq(dst, rt);
1790 }
1791 __ jmp(done);
1792
1793 __ bind(below);
1794 movfp(masm, pt, dst, min ? a : b, rt);
1795
1796 __ bind(done);
1797 }
1798
1799 //=============================================================================
1800 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1801
1802 int ConstantTable::calculate_table_base_offset() const {
1803 return 0; // absolute addressing, no offset
1804 }
1805
1806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1808 ShouldNotReachHere();
1809 }
1810
1811 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1812 // Empty encoding
1813 }
1814
1815 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1816 return 0;
1817 }
1818
1819 #ifndef PRODUCT
1820 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1821 st->print("# MachConstantBaseNode (empty encoding)");
1822 }
1823 #endif
1824
1825
1826 //=============================================================================
1827 #ifndef PRODUCT
1828 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1829 Compile* C = ra_->C;
1830
1831 int framesize = C->output()->frame_size_in_bytes();
1832 int bangsize = C->output()->bang_size_in_bytes();
1833 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1834 // Remove wordSize for return addr which is already pushed.
1835 framesize -= wordSize;
1836
1837 if (C->output()->need_stack_bang(bangsize)) {
1838 framesize -= wordSize;
1839 st->print("# stack bang (%d bytes)", bangsize);
1840 st->print("\n\t");
1841 st->print("pushq rbp\t# Save rbp");
1842 if (PreserveFramePointer) {
1843 st->print("\n\t");
1844 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1845 }
1846 if (framesize) {
1847 st->print("\n\t");
1848 st->print("subq rsp, #%d\t# Create frame",framesize);
1849 }
1850 } else {
1851 st->print("subq rsp, #%d\t# Create frame",framesize);
1852 st->print("\n\t");
1853 framesize -= wordSize;
1854 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1855 if (PreserveFramePointer) {
1856 st->print("\n\t");
1857 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1858 if (framesize > 0) {
1859 st->print("\n\t");
1860 st->print("addq rbp, #%d", framesize);
1861 }
1862 }
1863 }
1864
1865 if (VerifyStackAtCalls) {
1866 st->print("\n\t");
1867 framesize -= wordSize;
1868 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1869 #ifdef ASSERT
1870 st->print("\n\t");
1871 st->print("# stack alignment check");
1872 #endif
1873 }
1874 if (C->stub_function() != nullptr) {
1875 st->print("\n\t");
1876 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1877 st->print("\n\t");
1878 st->print("je fast_entry\t");
1879 st->print("\n\t");
1880 st->print("call #nmethod_entry_barrier_stub\t");
1881 st->print("\n\tfast_entry:");
1882 }
1883 st->cr();
1884 }
1885 #endif
1886
1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1888 Compile* C = ra_->C;
1889
1890 int framesize = C->output()->frame_size_in_bytes();
1891 int bangsize = C->output()->bang_size_in_bytes();
1892
1893 if (C->clinit_barrier_on_entry()) {
1894 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1895 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1896
1897 Label L_skip_barrier;
1898 Register klass = rscratch1;
1899
1900 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1901 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1902
1903 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1904
1905 __ bind(L_skip_barrier);
1906 }
1907
1908 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1909
1910 C->output()->set_frame_complete(__ offset());
1911
1912 if (C->has_mach_constant_base_node()) {
1913 // NOTE: We set the table base offset here because users might be
1914 // emitted before MachConstantBaseNode.
1915 ConstantTable& constant_table = C->output()->constant_table();
1916 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1917 }
1918 }
1919
1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1921 {
1922 return MachNode::size(ra_); // too many variables; just compute it
1923 // the hard way
1924 }
1925
1926 int MachPrologNode::reloc() const
1927 {
1928 return 0; // a large enough number
1929 }
1930
1931 //=============================================================================
1932 #ifndef PRODUCT
1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1934 {
1935 Compile* C = ra_->C;
1936 if (generate_vzeroupper(C)) {
1937 st->print("vzeroupper");
1938 st->cr(); st->print("\t");
1939 }
1940
1941 int framesize = C->output()->frame_size_in_bytes();
1942 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1943 // Remove word for return adr already pushed
1944 // and RBP
1945 framesize -= 2*wordSize;
1946
1947 if (framesize) {
1948 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1949 st->print("\t");
1950 }
1951
1952 st->print_cr("popq rbp");
1953 if (do_polling() && C->is_method_compilation()) {
1954 st->print("\t");
1955 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1956 "ja #safepoint_stub\t"
1957 "# Safepoint: poll for GC");
1958 }
1959 }
1960 #endif
1961
1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1963 {
1964 Compile* C = ra_->C;
1965
1966 if (generate_vzeroupper(C)) {
1967 // Clear upper bits of YMM registers when current compiled code uses
1968 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1969 __ vzeroupper();
1970 }
1971
1972 int framesize = C->output()->frame_size_in_bytes();
1973 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1974 // Remove word for return adr already pushed
1975 // and RBP
1976 framesize -= 2*wordSize;
1977
1978 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1979
1980 if (framesize) {
1981 __ addq(rsp, framesize);
1982 }
1983
1984 __ popq(rbp);
1985
1986 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1987 __ reserved_stack_check();
1988 }
1989
1990 if (do_polling() && C->is_method_compilation()) {
1991 Label dummy_label;
1992 Label* code_stub = &dummy_label;
1993 if (!C->output()->in_scratch_emit_size()) {
1994 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1995 C->output()->add_stub(stub);
1996 code_stub = &stub->entry();
1997 }
1998 __ relocate(relocInfo::poll_return_type);
1999 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2000 }
2001 }
2002
2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2004 {
2005 return MachNode::size(ra_); // too many variables; just compute it
2006 // the hard way
2007 }
2008
2009 int MachEpilogNode::reloc() const
2010 {
2011 return 2; // a large enough number
2012 }
2013
2014 const Pipeline* MachEpilogNode::pipeline() const
2015 {
2016 return MachNode::pipeline_class();
2017 }
2018
2019 //=============================================================================
2020
2021 enum RC {
2022 rc_bad,
2023 rc_int,
2024 rc_kreg,
2025 rc_float,
2026 rc_stack
2027 };
2028
2029 static enum RC rc_class(OptoReg::Name reg)
2030 {
2031 if( !OptoReg::is_valid(reg) ) return rc_bad;
2032
2033 if (OptoReg::is_stack(reg)) return rc_stack;
2034
2035 VMReg r = OptoReg::as_VMReg(reg);
2036
2037 if (r->is_Register()) return rc_int;
2038
2039 if (r->is_KRegister()) return rc_kreg;
2040
2041 assert(r->is_XMMRegister(), "must be");
2042 return rc_float;
2043 }
2044
2045 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2046 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2047 int src_hi, int dst_hi, uint ireg, outputStream* st);
2048
2049 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2050 int stack_offset, int reg, uint ireg, outputStream* st);
2051
2052 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2053 int dst_offset, uint ireg, outputStream* st) {
2054 if (masm) {
2055 switch (ireg) {
2056 case Op_VecS:
2057 __ movq(Address(rsp, -8), rax);
2058 __ movl(rax, Address(rsp, src_offset));
2059 __ movl(Address(rsp, dst_offset), rax);
2060 __ movq(rax, Address(rsp, -8));
2061 break;
2062 case Op_VecD:
2063 __ pushq(Address(rsp, src_offset));
2064 __ popq (Address(rsp, dst_offset));
2065 break;
2066 case Op_VecX:
2067 __ pushq(Address(rsp, src_offset));
2068 __ popq (Address(rsp, dst_offset));
2069 __ pushq(Address(rsp, src_offset+8));
2070 __ popq (Address(rsp, dst_offset+8));
2071 break;
2072 case Op_VecY:
2073 __ vmovdqu(Address(rsp, -32), xmm0);
2074 __ vmovdqu(xmm0, Address(rsp, src_offset));
2075 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2076 __ vmovdqu(xmm0, Address(rsp, -32));
2077 break;
2078 case Op_VecZ:
2079 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2080 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2081 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2082 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2083 break;
2084 default:
2085 ShouldNotReachHere();
2086 }
2087 #ifndef PRODUCT
2088 } else {
2089 switch (ireg) {
2090 case Op_VecS:
2091 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2092 "movl rax, [rsp + #%d]\n\t"
2093 "movl [rsp + #%d], rax\n\t"
2094 "movq rax, [rsp - #8]",
2095 src_offset, dst_offset);
2096 break;
2097 case Op_VecD:
2098 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset);
2101 break;
2102 case Op_VecX:
2103 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2104 "popq [rsp + #%d]\n\t"
2105 "pushq [rsp + #%d]\n\t"
2106 "popq [rsp + #%d]",
2107 src_offset, dst_offset, src_offset+8, dst_offset+8);
2108 break;
2109 case Op_VecY:
2110 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #32]",
2114 src_offset, dst_offset);
2115 break;
2116 case Op_VecZ:
2117 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2118 "vmovdqu xmm0, [rsp + #%d]\n\t"
2119 "vmovdqu [rsp + #%d], xmm0\n\t"
2120 "vmovdqu xmm0, [rsp - #64]",
2121 src_offset, dst_offset);
2122 break;
2123 default:
2124 ShouldNotReachHere();
2125 }
2126 #endif
2127 }
2128 }
2129
2130 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2131 PhaseRegAlloc* ra_,
2132 bool do_size,
2133 outputStream* st) const {
2134 assert(masm != nullptr || st != nullptr, "sanity");
2135 // Get registers to move
2136 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2137 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2138 OptoReg::Name dst_second = ra_->get_reg_second(this);
2139 OptoReg::Name dst_first = ra_->get_reg_first(this);
2140
2141 enum RC src_second_rc = rc_class(src_second);
2142 enum RC src_first_rc = rc_class(src_first);
2143 enum RC dst_second_rc = rc_class(dst_second);
2144 enum RC dst_first_rc = rc_class(dst_first);
2145
2146 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2147 "must move at least 1 register" );
2148
2149 if (src_first == dst_first && src_second == dst_second) {
2150 // Self copy, no move
2151 return 0;
2152 }
2153 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
2154 uint ireg = ideal_reg();
2155 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2156 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2157 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2158 // mem -> mem
2159 int src_offset = ra_->reg2offset(src_first);
2160 int dst_offset = ra_->reg2offset(dst_first);
2161 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2162 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2163 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2164 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2165 int stack_offset = ra_->reg2offset(dst_first);
2166 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2167 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2168 int stack_offset = ra_->reg2offset(src_first);
2169 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2170 } else {
2171 ShouldNotReachHere();
2172 }
2173 return 0;
2174 }
2175 if (src_first_rc == rc_stack) {
2176 // mem ->
2177 if (dst_first_rc == rc_stack) {
2178 // mem -> mem
2179 assert(src_second != dst_first, "overlap");
2180 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2181 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2182 // 64-bit
2183 int src_offset = ra_->reg2offset(src_first);
2184 int dst_offset = ra_->reg2offset(dst_first);
2185 if (masm) {
2186 __ pushq(Address(rsp, src_offset));
2187 __ popq (Address(rsp, dst_offset));
2188 #ifndef PRODUCT
2189 } else {
2190 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2191 "popq [rsp + #%d]",
2192 src_offset, dst_offset);
2193 #endif
2194 }
2195 } else {
2196 // 32-bit
2197 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2198 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2199 // No pushl/popl, so:
2200 int src_offset = ra_->reg2offset(src_first);
2201 int dst_offset = ra_->reg2offset(dst_first);
2202 if (masm) {
2203 __ movq(Address(rsp, -8), rax);
2204 __ movl(rax, Address(rsp, src_offset));
2205 __ movl(Address(rsp, dst_offset), rax);
2206 __ movq(rax, Address(rsp, -8));
2207 #ifndef PRODUCT
2208 } else {
2209 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2210 "movl rax, [rsp + #%d]\n\t"
2211 "movl [rsp + #%d], rax\n\t"
2212 "movq rax, [rsp - #8]",
2213 src_offset, dst_offset);
2214 #endif
2215 }
2216 }
2217 return 0;
2218 } else if (dst_first_rc == rc_int) {
2219 // mem -> gpr
2220 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2221 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2222 // 64-bit
2223 int offset = ra_->reg2offset(src_first);
2224 if (masm) {
2225 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2226 #ifndef PRODUCT
2227 } else {
2228 st->print("movq %s, [rsp + #%d]\t# spill",
2229 Matcher::regName[dst_first],
2230 offset);
2231 #endif
2232 }
2233 } else {
2234 // 32-bit
2235 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2236 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("movl %s, [rsp + #%d]\t# spill",
2243 Matcher::regName[dst_first],
2244 offset);
2245 #endif
2246 }
2247 }
2248 return 0;
2249 } else if (dst_first_rc == rc_float) {
2250 // mem-> xmm
2251 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2252 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2253 // 64-bit
2254 int offset = ra_->reg2offset(src_first);
2255 if (masm) {
2256 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2257 #ifndef PRODUCT
2258 } else {
2259 st->print("%s %s, [rsp + #%d]\t# spill",
2260 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2261 Matcher::regName[dst_first],
2262 offset);
2263 #endif
2264 }
2265 } else {
2266 // 32-bit
2267 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2268 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2269 int offset = ra_->reg2offset(src_first);
2270 if (masm) {
2271 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2272 #ifndef PRODUCT
2273 } else {
2274 st->print("movss %s, [rsp + #%d]\t# spill",
2275 Matcher::regName[dst_first],
2276 offset);
2277 #endif
2278 }
2279 }
2280 return 0;
2281 } else if (dst_first_rc == rc_kreg) {
2282 // mem -> kreg
2283 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2284 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2285 // 64-bit
2286 int offset = ra_->reg2offset(src_first);
2287 if (masm) {
2288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2289 #ifndef PRODUCT
2290 } else {
2291 st->print("kmovq %s, [rsp + #%d]\t# spill",
2292 Matcher::regName[dst_first],
2293 offset);
2294 #endif
2295 }
2296 }
2297 return 0;
2298 }
2299 } else if (src_first_rc == rc_int) {
2300 // gpr ->
2301 if (dst_first_rc == rc_stack) {
2302 // gpr -> mem
2303 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2304 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2305 // 64-bit
2306 int offset = ra_->reg2offset(dst_first);
2307 if (masm) {
2308 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2309 #ifndef PRODUCT
2310 } else {
2311 st->print("movq [rsp + #%d], %s\t# spill",
2312 offset,
2313 Matcher::regName[src_first]);
2314 #endif
2315 }
2316 } else {
2317 // 32-bit
2318 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2319 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2320 int offset = ra_->reg2offset(dst_first);
2321 if (masm) {
2322 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movl [rsp + #%d], %s\t# spill",
2326 offset,
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 }
2331 return 0;
2332 } else if (dst_first_rc == rc_int) {
2333 // gpr -> gpr
2334 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2335 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2336 // 64-bit
2337 if (masm) {
2338 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2339 as_Register(Matcher::_regEncode[src_first]));
2340 #ifndef PRODUCT
2341 } else {
2342 st->print("movq %s, %s\t# spill",
2343 Matcher::regName[dst_first],
2344 Matcher::regName[src_first]);
2345 #endif
2346 }
2347 return 0;
2348 } else {
2349 // 32-bit
2350 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2351 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2352 if (masm) {
2353 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2354 as_Register(Matcher::_regEncode[src_first]));
2355 #ifndef PRODUCT
2356 } else {
2357 st->print("movl %s, %s\t# spill",
2358 Matcher::regName[dst_first],
2359 Matcher::regName[src_first]);
2360 #endif
2361 }
2362 return 0;
2363 }
2364 } else if (dst_first_rc == rc_float) {
2365 // gpr -> xmm
2366 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2367 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2368 // 64-bit
2369 if (masm) {
2370 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2371 #ifndef PRODUCT
2372 } else {
2373 st->print("movdq %s, %s\t# spill",
2374 Matcher::regName[dst_first],
2375 Matcher::regName[src_first]);
2376 #endif
2377 }
2378 } else {
2379 // 32-bit
2380 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2381 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2382 if (masm) {
2383 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2384 #ifndef PRODUCT
2385 } else {
2386 st->print("movdl %s, %s\t# spill",
2387 Matcher::regName[dst_first],
2388 Matcher::regName[src_first]);
2389 #endif
2390 }
2391 }
2392 return 0;
2393 } else if (dst_first_rc == rc_kreg) {
2394 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2395 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2396 // 64-bit
2397 if (masm) {
2398 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2399 #ifndef PRODUCT
2400 } else {
2401 st->print("kmovq %s, %s\t# spill",
2402 Matcher::regName[dst_first],
2403 Matcher::regName[src_first]);
2404 #endif
2405 }
2406 }
2407 Unimplemented();
2408 return 0;
2409 }
2410 } else if (src_first_rc == rc_float) {
2411 // xmm ->
2412 if (dst_first_rc == rc_stack) {
2413 // xmm -> mem
2414 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2415 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2416 // 64-bit
2417 int offset = ra_->reg2offset(dst_first);
2418 if (masm) {
2419 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2420 #ifndef PRODUCT
2421 } else {
2422 st->print("movsd [rsp + #%d], %s\t# spill",
2423 offset,
2424 Matcher::regName[src_first]);
2425 #endif
2426 }
2427 } else {
2428 // 32-bit
2429 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2430 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2431 int offset = ra_->reg2offset(dst_first);
2432 if (masm) {
2433 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2434 #ifndef PRODUCT
2435 } else {
2436 st->print("movss [rsp + #%d], %s\t# spill",
2437 offset,
2438 Matcher::regName[src_first]);
2439 #endif
2440 }
2441 }
2442 return 0;
2443 } else if (dst_first_rc == rc_int) {
2444 // xmm -> gpr
2445 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2446 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2447 // 64-bit
2448 if (masm) {
2449 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2450 #ifndef PRODUCT
2451 } else {
2452 st->print("movdq %s, %s\t# spill",
2453 Matcher::regName[dst_first],
2454 Matcher::regName[src_first]);
2455 #endif
2456 }
2457 } else {
2458 // 32-bit
2459 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2460 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2461 if (masm) {
2462 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2463 #ifndef PRODUCT
2464 } else {
2465 st->print("movdl %s, %s\t# spill",
2466 Matcher::regName[dst_first],
2467 Matcher::regName[src_first]);
2468 #endif
2469 }
2470 }
2471 return 0;
2472 } else if (dst_first_rc == rc_float) {
2473 // xmm -> xmm
2474 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2475 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2476 // 64-bit
2477 if (masm) {
2478 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2479 #ifndef PRODUCT
2480 } else {
2481 st->print("%s %s, %s\t# spill",
2482 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2483 Matcher::regName[dst_first],
2484 Matcher::regName[src_first]);
2485 #endif
2486 }
2487 } else {
2488 // 32-bit
2489 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2490 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2491 if (masm) {
2492 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2493 #ifndef PRODUCT
2494 } else {
2495 st->print("%s %s, %s\t# spill",
2496 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2497 Matcher::regName[dst_first],
2498 Matcher::regName[src_first]);
2499 #endif
2500 }
2501 }
2502 return 0;
2503 } else if (dst_first_rc == rc_kreg) {
2504 assert(false, "Illegal spilling");
2505 return 0;
2506 }
2507 } else if (src_first_rc == rc_kreg) {
2508 if (dst_first_rc == rc_stack) {
2509 // mem -> kreg
2510 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2511 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2512 // 64-bit
2513 int offset = ra_->reg2offset(dst_first);
2514 if (masm) {
2515 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2516 #ifndef PRODUCT
2517 } else {
2518 st->print("kmovq [rsp + #%d] , %s\t# spill",
2519 offset,
2520 Matcher::regName[src_first]);
2521 #endif
2522 }
2523 }
2524 return 0;
2525 } else if (dst_first_rc == rc_int) {
2526 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2527 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2528 // 64-bit
2529 if (masm) {
2530 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2531 #ifndef PRODUCT
2532 } else {
2533 st->print("kmovq %s, %s\t# spill",
2534 Matcher::regName[dst_first],
2535 Matcher::regName[src_first]);
2536 #endif
2537 }
2538 }
2539 Unimplemented();
2540 return 0;
2541 } else if (dst_first_rc == rc_kreg) {
2542 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2543 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2544 // 64-bit
2545 if (masm) {
2546 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2547 #ifndef PRODUCT
2548 } else {
2549 st->print("kmovq %s, %s\t# spill",
2550 Matcher::regName[dst_first],
2551 Matcher::regName[src_first]);
2552 #endif
2553 }
2554 }
2555 return 0;
2556 } else if (dst_first_rc == rc_float) {
2557 assert(false, "Illegal spill");
2558 return 0;
2559 }
2560 }
2561
2562 assert(0," foo ");
2563 Unimplemented();
2564 return 0;
2565 }
2566
2567 #ifndef PRODUCT
2568 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2569 implementation(nullptr, ra_, false, st);
2570 }
2571 #endif
2572
2573 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2574 implementation(masm, ra_, false, nullptr);
2575 }
2576
2577 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2578 return MachNode::size(ra_);
2579 }
2580
2581 //=============================================================================
2582 #ifndef PRODUCT
2583 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 int reg = ra_->get_reg_first(this);
2587 st->print("leaq %s, [rsp + #%d]\t# box lock",
2588 Matcher::regName[reg], offset);
2589 }
2590 #endif
2591
2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_encode(this);
2596
2597 __ lea(as_Register(reg), Address(rsp, offset));
2598 }
2599
2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2601 {
2602 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2603 if (ra_->get_encode(this) > 15) {
2604 return (offset < 0x80) ? 6 : 9; // REX2
2605 } else {
2606 return (offset < 0x80) ? 5 : 8; // REX
2607 }
2608 }
2609
2610 //=============================================================================
2611 #ifndef PRODUCT
2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2613 {
2614 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2615 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2616 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2617 }
2618 #endif
2619
2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2621 {
2622 __ ic_check(InteriorEntryAlignment);
2623 }
2624
2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2626 {
2627 return MachNode::size(ra_); // too many variables; just compute it
2628 // the hard way
2629 }
2630
2631
2632 //=============================================================================
2633
2634 bool Matcher::supports_vector_calling_convention(void) {
2635 return EnableVectorSupport;
2636 }
2637
2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2639 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2640 }
2641
2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2643 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2644 }
2645
2646 #ifdef ASSERT
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2649 }
2650 #endif
2651
2652 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2653 int oper_index) {
2654 if (mdef == nullptr) {
2655 return false;
2656 }
2657
2658 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2659 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2660 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2661 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2662 return false;
2663 }
2664
2665 // Complex memory operand covers multiple incoming edges needed for
2666 // address computation. Biasing def towards any address component will not
2667 // result in NDD demotion by assembler.
2668 if (mdef->operand_num_edges(oper_index) != 1) {
2669 return false;
2670 }
2671
2672 // Demotion candidate must be register mask compatible with definition.
2673 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2674 if (!oper_mask.overlap(mdef->out_RegMask())) {
2675 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2676 return false;
2677 }
2678
2679 switch (oper_index) {
2680 // First operand of MachNode corresponding to Intel APX NDD selection
2681 // pattern can share its assigned register with definition operand if
2682 // their live ranges do not overlap. In such a scenario we can demote
2683 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2684 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2685 // are decorated with a special flag by instruction selector.
2686 case 1:
2687 return is_ndd_demotable_opr1(mdef);
2688
2689 // Definition operand of commutative operation can be biased towards second
2690 // operand.
2691 case 2:
2692 return is_ndd_demotable_opr2(mdef);
2693
2694 // Current scheme only selects up to two biasing candidates
2695 default:
2696 assert(false, "unhandled operand index: %s", mdef->Name());
2697 break;
2698 }
2699
2700 return false;
2701 }
2702
2703 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2704 assert(EnableVectorSupport, "sanity");
2705 int lo = XMM0_num;
2706 int hi = XMM0b_num;
2707 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2708 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2709 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2710 return OptoRegPair(hi, lo);
2711 }
2712
2713 // Is this branch offset short enough that a short branch can be used?
2714 //
2715 // NOTE: If the platform does not provide any short branch variants, then
2716 // this method should return false for offset 0.
2717 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2718 // The passed offset is relative to address of the branch.
2719 // On 86 a branch displacement is calculated relative to address
2720 // of a next instruction.
2721 offset -= br_size;
2722
2723 // the short version of jmpConUCF2 contains multiple branches,
2724 // making the reach slightly less
2725 if (rule == jmpConUCF2_rule)
2726 return (-126 <= offset && offset <= 125);
2727 return (-128 <= offset && offset <= 127);
2728 }
2729
2730 #ifdef ASSERT
2731 // Return whether or not this register is ever used as an argument.
2732 bool Matcher::can_be_java_arg(int reg)
2733 {
2734 return
2735 reg == RDI_num || reg == RDI_H_num ||
2736 reg == RSI_num || reg == RSI_H_num ||
2737 reg == RDX_num || reg == RDX_H_num ||
2738 reg == RCX_num || reg == RCX_H_num ||
2739 reg == R8_num || reg == R8_H_num ||
2740 reg == R9_num || reg == R9_H_num ||
2741 reg == R12_num || reg == R12_H_num ||
2742 reg == XMM0_num || reg == XMM0b_num ||
2743 reg == XMM1_num || reg == XMM1b_num ||
2744 reg == XMM2_num || reg == XMM2b_num ||
2745 reg == XMM3_num || reg == XMM3b_num ||
2746 reg == XMM4_num || reg == XMM4b_num ||
2747 reg == XMM5_num || reg == XMM5b_num ||
2748 reg == XMM6_num || reg == XMM6b_num ||
2749 reg == XMM7_num || reg == XMM7b_num;
2750 }
2751 #endif
2752
2753 uint Matcher::int_pressure_limit()
2754 {
2755 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2756 }
2757
2758 uint Matcher::float_pressure_limit()
2759 {
2760 // After experiment around with different values, the following default threshold
2761 // works best for LCM's register pressure scheduling on x64.
2762 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2763 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2764 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2765 }
2766
2767 // Register for DIVI projection of divmodI
2768 const RegMask& Matcher::divI_proj_mask() {
2769 return INT_RAX_REG_mask();
2770 }
2771
2772 // Register for MODI projection of divmodI
2773 const RegMask& Matcher::modI_proj_mask() {
2774 return INT_RDX_REG_mask();
2775 }
2776
2777 // Register for DIVL projection of divmodL
2778 const RegMask& Matcher::divL_proj_mask() {
2779 return LONG_RAX_REG_mask();
2780 }
2781
2782 // Register for MODL projection of divmodL
2783 const RegMask& Matcher::modL_proj_mask() {
2784 return LONG_RDX_REG_mask();
2785 }
2786
2787 %}
2788
2789 source_hpp %{
2790 // Header information of the source block.
2791 // Method declarations/definitions which are used outside
2792 // the ad-scope can conveniently be defined here.
2793 //
2794 // To keep related declarations/definitions/uses close together,
2795 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2796
2797 #include "runtime/vm_version.hpp"
2798
2799 class NativeJump;
2800
2801 class CallStubImpl {
2802
2803 //--------------------------------------------------------------
2804 //---< Used for optimization in Compile::shorten_branches >---
2805 //--------------------------------------------------------------
2806
2807 public:
2808 // Size of call trampoline stub.
2809 static uint size_call_trampoline() {
2810 return 0; // no call trampolines on this platform
2811 }
2812
2813 // number of relocations needed by a call trampoline stub
2814 static uint reloc_call_trampoline() {
2815 return 0; // no call trampolines on this platform
2816 }
2817 };
2818
2819 class HandlerImpl {
2820
2821 public:
2822
2823 static int emit_deopt_handler(C2_MacroAssembler* masm);
2824
2825 static uint size_deopt_handler() {
2826 // one call and one jmp.
2827 return 7;
2828 }
2829 };
2830
2831 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2832 switch(bytes) {
2833 case 4: // fall-through
2834 case 8: // fall-through
2835 case 16: return Assembler::AVX_128bit;
2836 case 32: return Assembler::AVX_256bit;
2837 case 64: return Assembler::AVX_512bit;
2838
2839 default: {
2840 ShouldNotReachHere();
2841 return Assembler::AVX_NoVec;
2842 }
2843 }
2844 }
2845
2846 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2847 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2848 }
2849
2850 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2851 uint def_idx = use->operand_index(opnd);
2852 Node* def = use->in(def_idx);
2853 return vector_length_encoding(def);
2854 }
2855
2856 static inline bool is_vector_popcount_predicate(BasicType bt) {
2857 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2858 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2859 }
2860
2861 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2862 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2863 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2864 }
2865
2866 class Node::PD {
2867 public:
2868 enum NodeFlags : uint64_t {
2869 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2870 Flag_sets_carry_flag = Node::_last_flag << 2,
2871 Flag_sets_parity_flag = Node::_last_flag << 3,
2872 Flag_sets_zero_flag = Node::_last_flag << 4,
2873 Flag_sets_overflow_flag = Node::_last_flag << 5,
2874 Flag_sets_sign_flag = Node::_last_flag << 6,
2875 Flag_clears_carry_flag = Node::_last_flag << 7,
2876 Flag_clears_parity_flag = Node::_last_flag << 8,
2877 Flag_clears_zero_flag = Node::_last_flag << 9,
2878 Flag_clears_overflow_flag = Node::_last_flag << 10,
2879 Flag_clears_sign_flag = Node::_last_flag << 11,
2880 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2881 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2882 _last_flag = Flag_ndd_demotable_opr2
2883 };
2884 };
2885
2886 %} // end source_hpp
2887
2888 source %{
2889
2890 #include "opto/addnode.hpp"
2891 #include "c2_intelJccErratum_x86.hpp"
2892
2893 void PhaseOutput::pd_perform_mach_node_analysis() {
2894 if (VM_Version::has_intel_jcc_erratum()) {
2895 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2896 _buf_sizes._code += extra_padding;
2897 }
2898 }
2899
2900 int MachNode::pd_alignment_required() const {
2901 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2902 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2903 return IntelJccErratum::largest_jcc_size() + 1;
2904 } else {
2905 return 1;
2906 }
2907 }
2908
2909 int MachNode::compute_padding(int current_offset) const {
2910 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2911 Compile* C = Compile::current();
2912 PhaseOutput* output = C->output();
2913 Block* block = output->block();
2914 int index = output->index();
2915 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2916 } else {
2917 return 0;
2918 }
2919 }
2920
2921 // Emit deopt handler code.
2922 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2923
2924 // Note that the code buffer's insts_mark is always relative to insts.
2925 // That's why we must use the macroassembler to generate a handler.
2926 address base = __ start_a_stub(size_deopt_handler());
2927 if (base == nullptr) {
2928 ciEnv::current()->record_failure("CodeCache is full");
2929 return 0; // CodeBuffer::expand failed
2930 }
2931 int offset = __ offset();
2932
2933 Label start;
2934 __ bind(start);
2935
2936 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2937
2938 int entry_offset = __ offset();
2939
2940 __ jmp(start);
2941
2942 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2943 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2944 "out of bounds read in post-call NOP check");
2945 __ end_a_stub();
2946 return entry_offset;
2947 }
2948
2949 static Assembler::Width widthForType(BasicType bt) {
2950 if (bt == T_BYTE) {
2951 return Assembler::B;
2952 } else if (bt == T_SHORT) {
2953 return Assembler::W;
2954 } else if (bt == T_INT) {
2955 return Assembler::D;
2956 } else {
2957 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2958 return Assembler::Q;
2959 }
2960 }
2961
2962 //=============================================================================
2963
2964 // Float masks come from different places depending on platform.
2965 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2966 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2967 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2968 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2969 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2970 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2971 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2972 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2973 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2974 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2975 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2976 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2977 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2978 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2979 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2980 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2981 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2982 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2983 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2984
2985 //=============================================================================
2986 bool Matcher::match_rule_supported(int opcode) {
2987 if (!has_match_rule(opcode)) {
2988 return false; // no match rule present
2989 }
2990 switch (opcode) {
2991 case Op_AbsVL:
2992 case Op_StoreVectorScatter:
2993 if (UseAVX < 3) {
2994 return false;
2995 }
2996 break;
2997 case Op_PopCountI:
2998 case Op_PopCountL:
2999 if (!UsePopCountInstruction) {
3000 return false;
3001 }
3002 break;
3003 case Op_PopCountVI:
3004 if (UseAVX < 2) {
3005 return false;
3006 }
3007 break;
3008 case Op_CompressV:
3009 case Op_ExpandV:
3010 case Op_PopCountVL:
3011 if (UseAVX < 2) {
3012 return false;
3013 }
3014 break;
3015 case Op_MulVI:
3016 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3017 return false;
3018 }
3019 break;
3020 case Op_MulVL:
3021 if (UseSSE < 4) { // only with SSE4_1 or AVX
3022 return false;
3023 }
3024 break;
3025 case Op_MulReductionVL:
3026 if (VM_Version::supports_avx512dq() == false) {
3027 return false;
3028 }
3029 break;
3030 case Op_AbsVB:
3031 case Op_AbsVS:
3032 case Op_AbsVI:
3033 case Op_AddReductionVI:
3034 case Op_AndReductionV:
3035 case Op_OrReductionV:
3036 case Op_XorReductionV:
3037 if (UseSSE < 3) { // requires at least SSSE3
3038 return false;
3039 }
3040 break;
3041 case Op_MaxHF:
3042 case Op_MinHF:
3043 if (!VM_Version::supports_avx512vlbw()) {
3044 return false;
3045 } // fallthrough
3046 case Op_AddHF:
3047 case Op_DivHF:
3048 case Op_FmaHF:
3049 case Op_MulHF:
3050 case Op_ReinterpretS2HF:
3051 case Op_ReinterpretHF2S:
3052 case Op_SubHF:
3053 case Op_SqrtHF:
3054 if (!VM_Version::supports_avx512_fp16()) {
3055 return false;
3056 }
3057 break;
3058 case Op_VectorLoadShuffle:
3059 case Op_VectorRearrange:
3060 case Op_MulReductionVI:
3061 if (UseSSE < 4) { // requires at least SSE4
3062 return false;
3063 }
3064 break;
3065 case Op_IsInfiniteF:
3066 case Op_IsInfiniteD:
3067 if (!VM_Version::supports_avx512dq()) {
3068 return false;
3069 }
3070 break;
3071 case Op_SqrtVD:
3072 case Op_SqrtVF:
3073 case Op_VectorMaskCmp:
3074 case Op_VectorCastB2X:
3075 case Op_VectorCastS2X:
3076 case Op_VectorCastI2X:
3077 case Op_VectorCastL2X:
3078 case Op_VectorCastF2X:
3079 case Op_VectorCastD2X:
3080 case Op_VectorUCastB2X:
3081 case Op_VectorUCastS2X:
3082 case Op_VectorUCastI2X:
3083 case Op_VectorMaskCast:
3084 if (UseAVX < 1) { // enabled for AVX only
3085 return false;
3086 }
3087 break;
3088 case Op_PopulateIndex:
3089 if (UseAVX < 2) {
3090 return false;
3091 }
3092 break;
3093 case Op_RoundVF:
3094 if (UseAVX < 2) { // enabled for AVX2 only
3095 return false;
3096 }
3097 break;
3098 case Op_RoundVD:
3099 if (UseAVX < 3) {
3100 return false; // enabled for AVX3 only
3101 }
3102 break;
3103 case Op_CompareAndSwapL:
3104 case Op_CompareAndSwapP:
3105 break;
3106 case Op_StrIndexOf:
3107 if (!UseSSE42Intrinsics) {
3108 return false;
3109 }
3110 break;
3111 case Op_StrIndexOfChar:
3112 if (!UseSSE42Intrinsics) {
3113 return false;
3114 }
3115 break;
3116 case Op_OnSpinWait:
3117 if (VM_Version::supports_on_spin_wait() == false) {
3118 return false;
3119 }
3120 break;
3121 case Op_MulVB:
3122 case Op_LShiftVB:
3123 case Op_RShiftVB:
3124 case Op_URShiftVB:
3125 case Op_VectorInsert:
3126 case Op_VectorLoadMask:
3127 case Op_VectorStoreMask:
3128 case Op_VectorBlend:
3129 if (UseSSE < 4) {
3130 return false;
3131 }
3132 break;
3133 case Op_MaxD:
3134 case Op_MaxF:
3135 case Op_MinD:
3136 case Op_MinF:
3137 if (UseAVX < 1) { // enabled for AVX only
3138 return false;
3139 }
3140 break;
3141 case Op_CacheWB:
3142 case Op_CacheWBPreSync:
3143 case Op_CacheWBPostSync:
3144 if (!VM_Version::supports_data_cache_line_flush()) {
3145 return false;
3146 }
3147 break;
3148 case Op_ExtractB:
3149 case Op_ExtractL:
3150 case Op_ExtractI:
3151 case Op_RoundDoubleMode:
3152 if (UseSSE < 4) {
3153 return false;
3154 }
3155 break;
3156 case Op_RoundDoubleModeV:
3157 if (VM_Version::supports_avx() == false) {
3158 return false; // 128bit vroundpd is not available
3159 }
3160 break;
3161 case Op_LoadVectorGather:
3162 case Op_LoadVectorGatherMasked:
3163 if (UseAVX < 2) {
3164 return false;
3165 }
3166 break;
3167 case Op_FmaF:
3168 case Op_FmaD:
3169 case Op_FmaVD:
3170 case Op_FmaVF:
3171 if (!UseFMA) {
3172 return false;
3173 }
3174 break;
3175 case Op_MacroLogicV:
3176 if (UseAVX < 3 || !UseVectorMacroLogic) {
3177 return false;
3178 }
3179 break;
3180
3181 case Op_VectorCmpMasked:
3182 case Op_VectorMaskGen:
3183 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3184 return false;
3185 }
3186 break;
3187 case Op_VectorMaskFirstTrue:
3188 case Op_VectorMaskLastTrue:
3189 case Op_VectorMaskTrueCount:
3190 case Op_VectorMaskToLong:
3191 if (UseAVX < 1) {
3192 return false;
3193 }
3194 break;
3195 case Op_RoundF:
3196 case Op_RoundD:
3197 break;
3198 case Op_CopySignD:
3199 case Op_CopySignF:
3200 if (UseAVX < 3) {
3201 return false;
3202 }
3203 if (!VM_Version::supports_avx512vl()) {
3204 return false;
3205 }
3206 break;
3207 case Op_CompressBits:
3208 case Op_ExpandBits:
3209 if (!VM_Version::supports_bmi2()) {
3210 return false;
3211 }
3212 break;
3213 case Op_CompressM:
3214 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3215 return false;
3216 }
3217 break;
3218 case Op_ConvF2HF:
3219 case Op_ConvHF2F:
3220 if (!VM_Version::supports_float16()) {
3221 return false;
3222 }
3223 break;
3224 case Op_VectorCastF2HF:
3225 case Op_VectorCastHF2F:
3226 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3227 return false;
3228 }
3229 break;
3230 }
3231 return true; // Match rules are supported by default.
3232 }
3233
3234 //------------------------------------------------------------------------
3235
3236 static inline bool is_pop_count_instr_target(BasicType bt) {
3237 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3238 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3239 }
3240
3241 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3242 return match_rule_supported_vector(opcode, vlen, bt);
3243 }
3244
3245 // Identify extra cases that we might want to provide match rules for vector nodes and
3246 // other intrinsics guarded with vector length (vlen) and element type (bt).
3247 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3248 if (!match_rule_supported(opcode)) {
3249 return false;
3250 }
3251 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3252 // * SSE2 supports 128bit vectors for all types;
3253 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3254 // * AVX2 supports 256bit vectors for all types;
3255 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3256 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3257 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3258 // And MaxVectorSize is taken into account as well.
3259 if (!vector_size_supported(bt, vlen)) {
3260 return false;
3261 }
3262 // Special cases which require vector length follow:
3263 // * implementation limitations
3264 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3265 // * 128bit vroundpd instruction is present only in AVX1
3266 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3267 switch (opcode) {
3268 case Op_MaxVHF:
3269 case Op_MinVHF:
3270 if (!VM_Version::supports_avx512bw()) {
3271 return false;
3272 }
3273 case Op_AddVHF:
3274 case Op_DivVHF:
3275 case Op_FmaVHF:
3276 case Op_MulVHF:
3277 case Op_SubVHF:
3278 case Op_SqrtVHF:
3279 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3280 return false;
3281 }
3282 if (!VM_Version::supports_avx512_fp16()) {
3283 return false;
3284 }
3285 break;
3286 case Op_AbsVF:
3287 case Op_NegVF:
3288 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3289 return false; // 512bit vandps and vxorps are not available
3290 }
3291 break;
3292 case Op_AbsVD:
3293 case Op_NegVD:
3294 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3295 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3296 }
3297 break;
3298 case Op_RotateRightV:
3299 case Op_RotateLeftV:
3300 if (bt != T_INT && bt != T_LONG) {
3301 return false;
3302 } // fallthrough
3303 case Op_MacroLogicV:
3304 if (!VM_Version::supports_evex() ||
3305 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3306 return false;
3307 }
3308 break;
3309 case Op_ClearArray:
3310 case Op_VectorMaskGen:
3311 case Op_VectorCmpMasked:
3312 if (!VM_Version::supports_avx512bw()) {
3313 return false;
3314 }
3315 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3316 return false;
3317 }
3318 break;
3319 case Op_LoadVectorMasked:
3320 case Op_StoreVectorMasked:
3321 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3322 return false;
3323 }
3324 break;
3325 case Op_UMinV:
3326 case Op_UMaxV:
3327 if (UseAVX == 0) {
3328 return false;
3329 }
3330 break;
3331 case Op_UMinReductionV:
3332 case Op_UMaxReductionV:
3333 if (UseAVX == 0) {
3334 return false;
3335 }
3336 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3337 return false;
3338 }
3339 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3340 return false;
3341 }
3342 break;
3343 case Op_MaxV:
3344 case Op_MinV:
3345 if (UseSSE < 4 && is_integral_type(bt)) {
3346 return false;
3347 }
3348 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3349 // Float/Double intrinsics are enabled for AVX family currently.
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3354 return false;
3355 }
3356 }
3357 break;
3358 case Op_CallLeafVector:
3359 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3360 return false;
3361 }
3362 break;
3363 case Op_AddReductionVI:
3364 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3365 return false;
3366 }
3367 // fallthrough
3368 case Op_AndReductionV:
3369 case Op_OrReductionV:
3370 case Op_XorReductionV:
3371 if (is_subword_type(bt) && (UseSSE < 4)) {
3372 return false;
3373 }
3374 break;
3375 case Op_MinReductionV:
3376 case Op_MaxReductionV:
3377 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3378 return false;
3379 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3380 return false;
3381 }
3382 // Float/Double intrinsics enabled for AVX family.
3383 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3384 return false;
3385 }
3386 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorBlend:
3391 if (UseAVX == 0 && size_in_bits < 128) {
3392 return false;
3393 }
3394 break;
3395 case Op_VectorTest:
3396 if (UseSSE < 4) {
3397 return false; // Implementation limitation
3398 } else if (size_in_bits < 32) {
3399 return false; // Implementation limitation
3400 }
3401 break;
3402 case Op_VectorLoadShuffle:
3403 case Op_VectorRearrange:
3404 if(vlen == 2) {
3405 return false; // Implementation limitation due to how shuffle is loaded
3406 } else if (size_in_bits == 256 && UseAVX < 2) {
3407 return false; // Implementation limitation
3408 }
3409 break;
3410 case Op_VectorLoadMask:
3411 case Op_VectorMaskCast:
3412 if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 // fallthrough
3416 case Op_VectorStoreMask:
3417 if (vlen == 2) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_PopulateIndex:
3422 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3423 return false;
3424 }
3425 break;
3426 case Op_VectorCastB2X:
3427 case Op_VectorCastS2X:
3428 case Op_VectorCastI2X:
3429 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3430 return false;
3431 }
3432 break;
3433 case Op_VectorCastL2X:
3434 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3435 return false;
3436 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3437 return false;
3438 }
3439 break;
3440 case Op_VectorCastF2X: {
3441 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3442 // happen after intermediate conversion to integer and special handling
3443 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3444 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3445 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3446 return false;
3447 }
3448 }
3449 // fallthrough
3450 case Op_VectorCastD2X:
3451 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3452 return false;
3453 }
3454 break;
3455 case Op_VectorCastF2HF:
3456 case Op_VectorCastHF2F:
3457 if (!VM_Version::supports_f16c() &&
3458 ((!VM_Version::supports_evex() ||
3459 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3460 return false;
3461 }
3462 break;
3463 case Op_RoundVD:
3464 if (!VM_Version::supports_avx512dq()) {
3465 return false;
3466 }
3467 break;
3468 case Op_MulReductionVI:
3469 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3470 return false;
3471 }
3472 break;
3473 case Op_LoadVectorGatherMasked:
3474 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3475 return false;
3476 }
3477 if (is_subword_type(bt) &&
3478 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3479 (size_in_bits < 64) ||
3480 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3481 return false;
3482 }
3483 break;
3484 case Op_StoreVectorScatterMasked:
3485 case Op_StoreVectorScatter:
3486 if (is_subword_type(bt)) {
3487 return false;
3488 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3489 return false;
3490 }
3491 // fallthrough
3492 case Op_LoadVectorGather:
3493 if (!is_subword_type(bt) && size_in_bits == 64) {
3494 return false;
3495 }
3496 if (is_subword_type(bt) && size_in_bits < 64) {
3497 return false;
3498 }
3499 break;
3500 case Op_SaturatingAddV:
3501 case Op_SaturatingSubV:
3502 if (UseAVX < 1) {
3503 return false; // Implementation limitation
3504 }
3505 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3506 return false;
3507 }
3508 break;
3509 case Op_SelectFromTwoVector:
3510 if (size_in_bits < 128) {
3511 return false;
3512 }
3513 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3514 return false;
3515 }
3516 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3517 return false;
3518 }
3519 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3520 return false;
3521 }
3522 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3523 return false;
3524 }
3525 break;
3526 case Op_MaskAll:
3527 if (!VM_Version::supports_evex()) {
3528 return false;
3529 }
3530 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3531 return false;
3532 }
3533 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 break;
3537 case Op_VectorMaskCmp:
3538 if (vlen < 2 || size_in_bits < 32) {
3539 return false;
3540 }
3541 break;
3542 case Op_CompressM:
3543 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3544 return false;
3545 }
3546 break;
3547 case Op_CompressV:
3548 case Op_ExpandV:
3549 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3550 return false;
3551 }
3552 if (size_in_bits < 128 ) {
3553 return false;
3554 }
3555 case Op_VectorLongToMask:
3556 if (UseAVX < 1) {
3557 return false;
3558 }
3559 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3560 return false;
3561 }
3562 break;
3563 case Op_SignumVD:
3564 case Op_SignumVF:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 break;
3569 case Op_PopCountVI:
3570 case Op_PopCountVL: {
3571 if (!is_pop_count_instr_target(bt) &&
3572 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3573 return false;
3574 }
3575 }
3576 break;
3577 case Op_ReverseV:
3578 case Op_ReverseBytesV:
3579 if (UseAVX < 2) {
3580 return false;
3581 }
3582 break;
3583 case Op_CountTrailingZerosV:
3584 case Op_CountLeadingZerosV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 }
3590 return true; // Per default match rules are supported.
3591 }
3592
3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3594 // ADLC based match_rule_supported routine checks for the existence of pattern based
3595 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3596 // of their non-masked counterpart with mask edge being the differentiator.
3597 // This routine does a strict check on the existence of masked operation patterns
3598 // by returning a default false value for all the other opcodes apart from the
3599 // ones whose masked instruction patterns are defined in this file.
3600 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3601 return false;
3602 }
3603
3604 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3605 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3606 return false;
3607 }
3608 switch(opcode) {
3609 // Unary masked operations
3610 case Op_AbsVB:
3611 case Op_AbsVS:
3612 if(!VM_Version::supports_avx512bw()) {
3613 return false; // Implementation limitation
3614 }
3615 case Op_AbsVI:
3616 case Op_AbsVL:
3617 return true;
3618
3619 // Ternary masked operations
3620 case Op_FmaVF:
3621 case Op_FmaVD:
3622 return true;
3623
3624 case Op_MacroLogicV:
3625 if(bt != T_INT && bt != T_LONG) {
3626 return false;
3627 }
3628 return true;
3629
3630 // Binary masked operations
3631 case Op_AddVB:
3632 case Op_AddVS:
3633 case Op_SubVB:
3634 case Op_SubVS:
3635 case Op_MulVS:
3636 case Op_LShiftVS:
3637 case Op_RShiftVS:
3638 case Op_URShiftVS:
3639 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3640 if (!VM_Version::supports_avx512bw()) {
3641 return false; // Implementation limitation
3642 }
3643 return true;
3644
3645 case Op_MulVL:
3646 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3647 if (!VM_Version::supports_avx512dq()) {
3648 return false; // Implementation limitation
3649 }
3650 return true;
3651
3652 case Op_AndV:
3653 case Op_OrV:
3654 case Op_XorV:
3655 case Op_RotateRightV:
3656 case Op_RotateLeftV:
3657 if (bt != T_INT && bt != T_LONG) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_VectorLoadMask:
3663 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3664 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3665 return false;
3666 }
3667 return true;
3668
3669 case Op_AddVI:
3670 case Op_AddVL:
3671 case Op_AddVF:
3672 case Op_AddVD:
3673 case Op_SubVI:
3674 case Op_SubVL:
3675 case Op_SubVF:
3676 case Op_SubVD:
3677 case Op_MulVI:
3678 case Op_MulVF:
3679 case Op_MulVD:
3680 case Op_DivVF:
3681 case Op_DivVD:
3682 case Op_SqrtVF:
3683 case Op_SqrtVD:
3684 case Op_LShiftVI:
3685 case Op_LShiftVL:
3686 case Op_RShiftVI:
3687 case Op_RShiftVL:
3688 case Op_URShiftVI:
3689 case Op_URShiftVL:
3690 case Op_LoadVectorMasked:
3691 case Op_StoreVectorMasked:
3692 case Op_LoadVectorGatherMasked:
3693 case Op_StoreVectorScatterMasked:
3694 return true;
3695
3696 case Op_UMinV:
3697 case Op_UMaxV:
3698 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3699 return false;
3700 } // fallthrough
3701 case Op_MaxV:
3702 case Op_MinV:
3703 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3704 return false; // Implementation limitation
3705 }
3706 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3707 return false; // Implementation limitation
3708 }
3709 return true;
3710 case Op_SaturatingAddV:
3711 case Op_SaturatingSubV:
3712 if (!is_subword_type(bt)) {
3713 return false;
3714 }
3715 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719
3720 case Op_VectorMaskCmp:
3721 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorRearrange:
3727 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3731 return false; // Implementation limitation
3732 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 // Binary Logical operations
3738 case Op_AndVMask:
3739 case Op_OrVMask:
3740 case Op_XorVMask:
3741 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 case Op_PopCountVI:
3747 case Op_PopCountVL:
3748 if (!is_pop_count_instr_target(bt)) {
3749 return false;
3750 }
3751 return true;
3752
3753 case Op_MaskAll:
3754 return true;
3755
3756 case Op_CountLeadingZerosV:
3757 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3758 return true;
3759 }
3760 default:
3761 return false;
3762 }
3763 }
3764
3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3766 return false;
3767 }
3768
3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3771 switch (elem_bt) {
3772 case T_BYTE: return false;
3773 case T_SHORT: return !VM_Version::supports_avx512bw();
3774 case T_INT: return !VM_Version::supports_avx();
3775 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3776 default:
3777 ShouldNotReachHere();
3778 return false;
3779 }
3780 }
3781
3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3783 // Prefer predicate if the mask type is "TypePVectMask".
3784 return vt->isa_pvectmask() != nullptr;
3785 }
3786
3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3788 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3789 bool legacy = (generic_opnd->opcode() == LEGVEC);
3790 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3791 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3792 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3793 return new legVecZOper();
3794 }
3795 if (legacy) {
3796 switch (ideal_reg) {
3797 case Op_VecS: return new legVecSOper();
3798 case Op_VecD: return new legVecDOper();
3799 case Op_VecX: return new legVecXOper();
3800 case Op_VecY: return new legVecYOper();
3801 case Op_VecZ: return new legVecZOper();
3802 }
3803 } else {
3804 switch (ideal_reg) {
3805 case Op_VecS: return new vecSOper();
3806 case Op_VecD: return new vecDOper();
3807 case Op_VecX: return new vecXOper();
3808 case Op_VecY: return new vecYOper();
3809 case Op_VecZ: return new vecZOper();
3810 }
3811 }
3812 ShouldNotReachHere();
3813 return nullptr;
3814 }
3815
3816 bool Matcher::is_reg2reg_move(MachNode* m) {
3817 switch (m->rule()) {
3818 case MoveVec2Leg_rule:
3819 case MoveLeg2Vec_rule:
3820 case MoveF2VL_rule:
3821 case MoveF2LEG_rule:
3822 case MoveVL2F_rule:
3823 case MoveLEG2F_rule:
3824 case MoveD2VL_rule:
3825 case MoveD2LEG_rule:
3826 case MoveVL2D_rule:
3827 case MoveLEG2D_rule:
3828 return true;
3829 default:
3830 return false;
3831 }
3832 }
3833
3834 bool Matcher::is_generic_vector(MachOper* opnd) {
3835 switch (opnd->opcode()) {
3836 case VEC:
3837 case LEGVEC:
3838 return true;
3839 default:
3840 return false;
3841 }
3842 }
3843
3844 //------------------------------------------------------------------------
3845
3846 const RegMask* Matcher::predicate_reg_mask(void) {
3847 return &_VECTMASK_REG_mask;
3848 }
3849
3850 // Max vector size in bytes. 0 if not supported.
3851 int Matcher::vector_width_in_bytes(BasicType bt) {
3852 assert(is_java_primitive(bt), "only primitive type vectors");
3853 // SSE2 supports 128bit vectors for all types.
3854 // AVX2 supports 256bit vectors for all types.
3855 // AVX2/EVEX supports 512bit vectors for all types.
3856 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3857 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3858 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3859 size = (UseAVX > 2) ? 64 : 32;
3860 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3861 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3862 // Use flag to limit vector size.
3863 size = MIN2(size,(int)MaxVectorSize);
3864 // Minimum 2 values in vector (or 4 for bytes).
3865 switch (bt) {
3866 case T_DOUBLE:
3867 case T_LONG:
3868 if (size < 16) return 0;
3869 break;
3870 case T_FLOAT:
3871 case T_INT:
3872 if (size < 8) return 0;
3873 break;
3874 case T_BOOLEAN:
3875 if (size < 4) return 0;
3876 break;
3877 case T_CHAR:
3878 if (size < 4) return 0;
3879 break;
3880 case T_BYTE:
3881 if (size < 4) return 0;
3882 break;
3883 case T_SHORT:
3884 if (size < 4) return 0;
3885 break;
3886 default:
3887 ShouldNotReachHere();
3888 }
3889 return size;
3890 }
3891
3892 // Limits on vector size (number of elements) loaded into vector.
3893 int Matcher::max_vector_size(const BasicType bt) {
3894 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3895 }
3896 int Matcher::min_vector_size(const BasicType bt) {
3897 int max_size = max_vector_size(bt);
3898 // Min size which can be loaded into vector is 4 bytes.
3899 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3900 // Support for calling svml double64 vectors
3901 if (bt == T_DOUBLE) {
3902 size = 1;
3903 }
3904 return MIN2(size,max_size);
3905 }
3906
3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3908 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3909 // by default on Cascade Lake
3910 if (VM_Version::is_default_intel_cascade_lake()) {
3911 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3912 }
3913 return Matcher::max_vector_size(bt);
3914 }
3915
3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3917 return -1;
3918 }
3919
3920 // Vector ideal reg corresponding to specified size in bytes
3921 uint Matcher::vector_ideal_reg(int size) {
3922 assert(MaxVectorSize >= size, "");
3923 switch(size) {
3924 case 4: return Op_VecS;
3925 case 8: return Op_VecD;
3926 case 16: return Op_VecX;
3927 case 32: return Op_VecY;
3928 case 64: return Op_VecZ;
3929 }
3930 ShouldNotReachHere();
3931 return 0;
3932 }
3933
3934 // Check for shift by small constant as well
3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3936 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3937 shift->in(2)->get_int() <= 3 &&
3938 // Are there other uses besides address expressions?
3939 !matcher->is_visited(shift)) {
3940 address_visited.set(shift->_idx); // Flag as address_visited
3941 mstack.push(shift->in(2), Matcher::Visit);
3942 Node *conv = shift->in(1);
3943 // Allow Matcher to match the rule which bypass
3944 // ConvI2L operation for an array index on LP64
3945 // if the index value is positive.
3946 if (conv->Opcode() == Op_ConvI2L &&
3947 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3948 // Are there other uses besides address expressions?
3949 !matcher->is_visited(conv)) {
3950 address_visited.set(conv->_idx); // Flag as address_visited
3951 mstack.push(conv->in(1), Matcher::Pre_Visit);
3952 } else {
3953 mstack.push(conv, Matcher::Pre_Visit);
3954 }
3955 return true;
3956 }
3957 return false;
3958 }
3959
3960 // This function identifies sub-graphs in which a 'load' node is
3961 // input to two different nodes, and such that it can be matched
3962 // with BMI instructions like blsi, blsr, etc.
3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3965 // refers to the same node.
3966 //
3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3968 // This is a temporary solution until we make DAGs expressible in ADL.
3969 template<typename ConType>
3970 class FusedPatternMatcher {
3971 Node* _op1_node;
3972 Node* _mop_node;
3973 int _con_op;
3974
3975 static int match_next(Node* n, int next_op, int next_op_idx) {
3976 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3977 return -1;
3978 }
3979
3980 if (next_op_idx == -1) { // n is commutative, try rotations
3981 if (n->in(1)->Opcode() == next_op) {
3982 return 1;
3983 } else if (n->in(2)->Opcode() == next_op) {
3984 return 2;
3985 }
3986 } else {
3987 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3988 if (n->in(next_op_idx)->Opcode() == next_op) {
3989 return next_op_idx;
3990 }
3991 }
3992 return -1;
3993 }
3994
3995 public:
3996 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3997 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3998
3999 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4000 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4001 typename ConType::NativeType con_value) {
4002 if (_op1_node->Opcode() != op1) {
4003 return false;
4004 }
4005 if (_mop_node->outcnt() > 2) {
4006 return false;
4007 }
4008 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4009 if (op1_op2_idx == -1) {
4010 return false;
4011 }
4012 // Memory operation must be the other edge
4013 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4014
4015 // Check that the mop node is really what we want
4016 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4017 Node* op2_node = _op1_node->in(op1_op2_idx);
4018 if (op2_node->outcnt() > 1) {
4019 return false;
4020 }
4021 assert(op2_node->Opcode() == op2, "Should be");
4022 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4023 if (op2_con_idx == -1) {
4024 return false;
4025 }
4026 // Memory operation must be the other edge
4027 int op2_mop_idx = (op2_con_idx & 1) + 1;
4028 // Check that the memory operation is the same node
4029 if (op2_node->in(op2_mop_idx) == _mop_node) {
4030 // Now check the constant
4031 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4032 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4033 return true;
4034 }
4035 }
4036 }
4037 return false;
4038 }
4039 };
4040
4041 static bool is_bmi_pattern(Node* n, Node* m) {
4042 assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
4043 if (n != nullptr && m != nullptr) {
4044 if (m->Opcode() == Op_LoadI) {
4045 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4046 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4047 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4048 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4049 } else if (m->Opcode() == Op_LoadL) {
4050 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4051 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4052 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4053 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4054 }
4055 }
4056 return false;
4057 }
4058
4059 // Should the matcher clone input 'm' of node 'n'?
4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4061 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4062 if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
4063 mstack.push(m, Visit);
4064 return true;
4065 }
4066 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4067 mstack.push(m, Visit); // m = ShiftCntV
4068 return true;
4069 }
4070 if (is_encode_and_store_pattern(n, m)) {
4071 mstack.push(m, Visit);
4072 return true;
4073 }
4074 return false;
4075 }
4076
4077 // Should the Matcher clone shifts on addressing modes, expecting them
4078 // to be subsumed into complex addressing expressions or compute them
4079 // into registers?
4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4081 Node *off = m->in(AddPNode::Offset);
4082 if (off->is_Con()) {
4083 address_visited.test_set(m->_idx); // Flag as address_visited
4084 Node *adr = m->in(AddPNode::Address);
4085
4086 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4087 // AtomicAdd is not an addressing expression.
4088 // Cheap to find it by looking for screwy base.
4089 if (adr->is_AddP() &&
4090 !adr->in(AddPNode::Base)->is_top() &&
4091 !adr->in(AddPNode::Offset)->is_Con() &&
4092 off->get_long() == (int) (off->get_long()) && // immL32
4093 // Are there other uses besides address expressions?
4094 !is_visited(adr)) {
4095 address_visited.set(adr->_idx); // Flag as address_visited
4096 Node *shift = adr->in(AddPNode::Offset);
4097 if (!clone_shift(shift, this, mstack, address_visited)) {
4098 mstack.push(shift, Pre_Visit);
4099 }
4100 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4101 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4102 } else {
4103 mstack.push(adr, Pre_Visit);
4104 }
4105
4106 // Clone X+offset as it also folds into most addressing expressions
4107 mstack.push(off, Visit);
4108 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4109 return true;
4110 } else if (clone_shift(off, this, mstack, address_visited)) {
4111 address_visited.test_set(m->_idx); // Flag as address_visited
4112 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4114 return true;
4115 }
4116 return false;
4117 }
4118
4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4120 switch (bt) {
4121 case BoolTest::eq:
4122 return Assembler::eq;
4123 case BoolTest::ne:
4124 return Assembler::neq;
4125 case BoolTest::le:
4126 case BoolTest::ule:
4127 return Assembler::le;
4128 case BoolTest::ge:
4129 case BoolTest::uge:
4130 return Assembler::nlt;
4131 case BoolTest::lt:
4132 case BoolTest::ult:
4133 return Assembler::lt;
4134 case BoolTest::gt:
4135 case BoolTest::ugt:
4136 return Assembler::nle;
4137 default : ShouldNotReachHere(); return Assembler::_false;
4138 }
4139 }
4140
4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4142 switch (bt) {
4143 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4144 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4145 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4146 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4147 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4148 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4149 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4150 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4151 }
4152 }
4153
4154 // Helper methods for MachSpillCopyNode::implementation().
4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4156 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4157 assert(ireg == Op_VecS || // 32bit vector
4158 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4159 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4160 "no non-adjacent vector moves" );
4161 if (masm) {
4162 switch (ireg) {
4163 case Op_VecS: // copy whole register
4164 case Op_VecD:
4165 case Op_VecX:
4166 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4167 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4168 } else {
4169 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4170 }
4171 break;
4172 case Op_VecY:
4173 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4174 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4175 } else {
4176 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4177 }
4178 break;
4179 case Op_VecZ:
4180 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4181 break;
4182 default:
4183 ShouldNotReachHere();
4184 }
4185 #ifndef PRODUCT
4186 } else {
4187 switch (ireg) {
4188 case Op_VecS:
4189 case Op_VecD:
4190 case Op_VecX:
4191 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4192 break;
4193 case Op_VecY:
4194 case Op_VecZ:
4195 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4196 break;
4197 default:
4198 ShouldNotReachHere();
4199 }
4200 #endif
4201 }
4202 }
4203
4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4205 int stack_offset, int reg, uint ireg, outputStream* st) {
4206 if (masm) {
4207 if (is_load) {
4208 switch (ireg) {
4209 case Op_VecS:
4210 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4211 break;
4212 case Op_VecD:
4213 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4214 break;
4215 case Op_VecX:
4216 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4217 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4218 } else {
4219 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4220 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4221 }
4222 break;
4223 case Op_VecY:
4224 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4225 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 } else {
4227 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4228 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4229 }
4230 break;
4231 case Op_VecZ:
4232 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4233 break;
4234 default:
4235 ShouldNotReachHere();
4236 }
4237 } else { // store
4238 switch (ireg) {
4239 case Op_VecS:
4240 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4241 break;
4242 case Op_VecD:
4243 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4244 break;
4245 case Op_VecX:
4246 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4247 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4248 }
4249 else {
4250 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4251 }
4252 break;
4253 case Op_VecY:
4254 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4255 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 }
4257 else {
4258 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4259 }
4260 break;
4261 case Op_VecZ:
4262 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4263 break;
4264 default:
4265 ShouldNotReachHere();
4266 }
4267 }
4268 #ifndef PRODUCT
4269 } else {
4270 if (is_load) {
4271 switch (ireg) {
4272 case Op_VecS:
4273 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4274 break;
4275 case Op_VecD:
4276 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4277 break;
4278 case Op_VecX:
4279 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecY:
4282 case Op_VecZ:
4283 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4284 break;
4285 default:
4286 ShouldNotReachHere();
4287 }
4288 } else { // store
4289 switch (ireg) {
4290 case Op_VecS:
4291 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4292 break;
4293 case Op_VecD:
4294 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4295 break;
4296 case Op_VecX:
4297 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecY:
4300 case Op_VecZ:
4301 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4302 break;
4303 default:
4304 ShouldNotReachHere();
4305 }
4306 }
4307 #endif
4308 }
4309 }
4310
4311 template <class T>
4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4313 int size = type2aelembytes(bt) * len;
4314 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4315 for (int i = 0; i < len; i++) {
4316 int offset = i * type2aelembytes(bt);
4317 switch (bt) {
4318 case T_BYTE: val->at(i) = con; break;
4319 case T_SHORT: {
4320 jshort c = con;
4321 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4322 break;
4323 }
4324 case T_INT: {
4325 jint c = con;
4326 memcpy(val->adr_at(offset), &c, sizeof(jint));
4327 break;
4328 }
4329 case T_LONG: {
4330 jlong c = con;
4331 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4332 break;
4333 }
4334 case T_FLOAT: {
4335 jfloat c = con;
4336 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4337 break;
4338 }
4339 case T_DOUBLE: {
4340 jdouble c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4342 break;
4343 }
4344 default: assert(false, "%s", type2name(bt));
4345 }
4346 }
4347 return val;
4348 }
4349
4350 static inline jlong high_bit_set(BasicType bt) {
4351 switch (bt) {
4352 case T_BYTE: return 0x8080808080808080;
4353 case T_SHORT: return 0x8000800080008000;
4354 case T_INT: return 0x8000000080000000;
4355 case T_LONG: return 0x8000000000000000;
4356 default:
4357 ShouldNotReachHere();
4358 return 0;
4359 }
4360 }
4361
4362 #ifndef PRODUCT
4363 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4364 st->print("nop \t# %d bytes pad for loops and calls", _count);
4365 }
4366 #endif
4367
4368 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4369 __ nop(_count);
4370 }
4371
4372 uint MachNopNode::size(PhaseRegAlloc*) const {
4373 return _count;
4374 }
4375
4376 #ifndef PRODUCT
4377 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4378 st->print("# breakpoint");
4379 }
4380 #endif
4381
4382 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4383 __ int3();
4384 }
4385
4386 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4387 return MachNode::size(ra_);
4388 }
4389
4390 %}
4391
4392 //----------ENCODING BLOCK-----------------------------------------------------
4393 // This block specifies the encoding classes used by the compiler to
4394 // output byte streams. Encoding classes are parameterized macros
4395 // used by Machine Instruction Nodes in order to generate the bit
4396 // encoding of the instruction. Operands specify their base encoding
4397 // interface with the interface keyword. There are currently
4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4399 // COND_INTER. REG_INTER causes an operand to generate a function
4400 // which returns its register number when queried. CONST_INTER causes
4401 // an operand to generate a function which returns the value of the
4402 // constant when queried. MEMORY_INTER causes an operand to generate
4403 // four functions which return the Base Register, the Index Register,
4404 // the Scale Value, and the Offset Value of the operand when queried.
4405 // COND_INTER causes an operand to generate six functions which return
4406 // the encoding code (ie - encoding bits for the instruction)
4407 // associated with each basic boolean condition for a conditional
4408 // instruction.
4409 //
4410 // Instructions specify two basic values for encoding. Again, a
4411 // function is available to check if the constant displacement is an
4412 // oop. They use the ins_encode keyword to specify their encoding
4413 // classes (which must be a sequence of enc_class names, and their
4414 // parameters, specified in the encoding block), and they use the
4415 // opcode keyword to specify, in order, their primary, secondary, and
4416 // tertiary opcode. Only the opcode sections which a particular
4417 // instruction needs for encoding need to be specified.
4418 encode %{
4419 enc_class cdql_enc(no_rax_rdx_RegI div)
4420 %{
4421 // Full implementation of Java idiv and irem; checks for
4422 // special case as described in JVM spec., p.243 & p.271.
4423 //
4424 // normal case special case
4425 //
4426 // input : rax: dividend min_int
4427 // reg: divisor -1
4428 //
4429 // output: rax: quotient (= rax idiv reg) min_int
4430 // rdx: remainder (= rax irem reg) 0
4431 //
4432 // Code sequnce:
4433 //
4434 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4435 // 5: 75 07/08 jne e <normal>
4436 // 7: 33 d2 xor %edx,%edx
4437 // [div >= 8 -> offset + 1]
4438 // [REX_B]
4439 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4440 // c: 74 03/04 je 11 <done>
4441 // 000000000000000e <normal>:
4442 // e: 99 cltd
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // f: f7 f9 idiv $div
4446 // 0000000000000011 <done>:
4447 Label normal;
4448 Label done;
4449
4450 // cmp $0x80000000,%eax
4451 __ cmpl(as_Register(RAX_enc), 0x80000000);
4452
4453 // jne e <normal>
4454 __ jccb(Assembler::notEqual, normal);
4455
4456 // xor %edx,%edx
4457 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4458
4459 // cmp $0xffffffffffffffff,%ecx
4460 __ cmpl($div$$Register, -1);
4461
4462 // je 11 <done>
4463 __ jccb(Assembler::equal, done);
4464
4465 // <normal>
4466 // cltd
4467 __ bind(normal);
4468 __ cdql();
4469
4470 // idivl
4471 // <done>
4472 __ idivl($div$$Register);
4473 __ bind(done);
4474 %}
4475
4476 enc_class cdqq_enc(no_rax_rdx_RegL div)
4477 %{
4478 // Full implementation of Java ldiv and lrem; checks for
4479 // special case as described in JVM spec., p.243 & p.271.
4480 //
4481 // normal case special case
4482 //
4483 // input : rax: dividend min_long
4484 // reg: divisor -1
4485 //
4486 // output: rax: quotient (= rax idiv reg) min_long
4487 // rdx: remainder (= rax irem reg) 0
4488 //
4489 // Code sequnce:
4490 //
4491 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4492 // 7: 00 00 80
4493 // a: 48 39 d0 cmp %rdx,%rax
4494 // d: 75 08 jne 17 <normal>
4495 // f: 33 d2 xor %edx,%edx
4496 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4497 // 15: 74 05 je 1c <done>
4498 // 0000000000000017 <normal>:
4499 // 17: 48 99 cqto
4500 // 19: 48 f7 f9 idiv $div
4501 // 000000000000001c <done>:
4502 Label normal;
4503 Label done;
4504
4505 // mov $0x8000000000000000,%rdx
4506 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4507
4508 // cmp %rdx,%rax
4509 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4510
4511 // jne 17 <normal>
4512 __ jccb(Assembler::notEqual, normal);
4513
4514 // xor %edx,%edx
4515 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4516
4517 // cmp $0xffffffffffffffff,$div
4518 __ cmpq($div$$Register, -1);
4519
4520 // je 1e <done>
4521 __ jccb(Assembler::equal, done);
4522
4523 // <normal>
4524 // cqto
4525 __ bind(normal);
4526 __ cdqq();
4527
4528 // idivq (note: must be emitted by the user of this rule)
4529 // <done>
4530 __ idivq($div$$Register);
4531 __ bind(done);
4532 %}
4533
4534 enc_class clear_avx %{
4535 DEBUG_ONLY(int off0 = __ offset());
4536 if (generate_vzeroupper(Compile::current())) {
4537 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4538 // Clear upper bits of YMM registers when current compiled code uses
4539 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4540 __ vzeroupper();
4541 }
4542 DEBUG_ONLY(int off1 = __ offset());
4543 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4544 %}
4545
4546 enc_class Java_To_Runtime(method meth) %{
4547 __ lea(r10, RuntimeAddress((address)$meth$$method));
4548 __ call(r10);
4549 __ post_call_nop();
4550 %}
4551
4552 enc_class Java_Static_Call(method meth)
4553 %{
4554 // JAVA STATIC CALL
4555 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4556 // determine who we intended to call.
4557 if (!_method) {
4558 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4559 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4560 // The NOP here is purely to ensure that eliding a call to
4561 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4562 __ nop(5);
4563 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4564 } else {
4565 int method_index = resolved_method_index(masm);
4566 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4567 : static_call_Relocation::spec(method_index);
4568 address mark = __ pc();
4569 int call_offset = __ offset();
4570 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4571 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4572 // Calls of the same statically bound method can share
4573 // a stub to the interpreter.
4574 __ code()->shared_stub_to_interp_for(_method, call_offset);
4575 } else {
4576 // Emit stubs for static call.
4577 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4578 __ clear_inst_mark();
4579 if (stub == nullptr) {
4580 ciEnv::current()->record_failure("CodeCache is full");
4581 return;
4582 }
4583 }
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
4624 // | SP-+--------+----> Matcher::_old_SP, even aligned
4625 // | | in | 3 area for Intel ret address
4626 // Owned by |preserve| Empty on Sparc.
4627 // SELF +--------+
4628 // | | pad2 | 2 pad to align old SP
4629 // | +--------+ 1
4630 // | | locks | 0
4631 // | +--------+----> OptoReg::stack0(), even aligned
4632 // | | pad1 | 11 pad to align new SP
4633 // | +--------+
4634 // | | | 10
4635 // | | spills | 9 spills
4636 // V | | 8 (pad0 slot for callee)
4637 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4638 // ^ | out | 7
4639 // | | args | 6 Holes in outgoing args owned by CALLEE
4640 // Owned by +--------+
4641 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4642 // | new |preserve| Must be even-aligned.
4643 // | SP-+--------+----> Matcher::_new_SP, even aligned
4644 // | | |
4645 //
4646 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4647 // known from SELF's arguments and the Java calling convention.
4648 // Region 6-7 is determined per call site.
4649 // Note 2: If the calling convention leaves holes in the incoming argument
4650 // area, those holes are owned by SELF. Holes in the outgoing area
4651 // are owned by the CALLEE. Holes should not be necessary in the
4652 // incoming area, as the Java calling convention is completely under
4653 // the control of the AD file. Doubles can be sorted and packed to
4654 // avoid holes. Holes in the outgoing arguments may be necessary for
4655 // varargs C calling conventions.
4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4657 // even aligned with pad0 as needed.
4658 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4659 // region 6-11 is even aligned; it may be padded out more so that
4660 // the region from SP to FP meets the minimum stack alignment.
4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4662 // alignment. Region 11, pad1, may be dynamically extended so that
4663 // SP meets the minimum alignment.
4664
4665 frame
4666 %{
4667 // These three registers define part of the calling convention
4668 // between compiled code and the interpreter.
4669 inline_cache_reg(RAX); // Inline Cache Register
4670
4671 // Optional: name the operand used by cisc-spilling to access
4672 // [stack_pointer + offset]
4673 cisc_spilling_operand_name(indOffset32);
4674
4675 // Number of stack slots consumed by locking an object
4676 sync_stack_slots(2);
4677
4678 // Compiled code's Frame Pointer
4679 frame_pointer(RSP);
4680
4681 // Stack alignment requirement
4682 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4683
4684 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4685 // for calls to C. Supports the var-args backing area for register parms.
4686 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4687
4688 // The after-PROLOG location of the return address. Location of
4689 // return address specifies a type (REG or STACK) and a number
4690 // representing the register number (i.e. - use a register name) or
4691 // stack slot.
4692 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4693 // Otherwise, it is above the locks and verification slot and alignment word
4694 return_addr(STACK - 2 +
4695 align_up((Compile::current()->in_preserve_stack_slots() +
4696 Compile::current()->fixed_slots()),
4697 stack_alignment_in_slots()));
4698
4699 // Location of compiled Java return values. Same as C for now.
4700 return_value
4701 %{
4702 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4703 "only return normal values");
4704
4705 static const int lo[Op_RegL + 1] = {
4706 0,
4707 0,
4708 RAX_num, // Op_RegN
4709 RAX_num, // Op_RegI
4710 RAX_num, // Op_RegP
4711 XMM0_num, // Op_RegF
4712 XMM0_num, // Op_RegD
4713 RAX_num // Op_RegL
4714 };
4715 static const int hi[Op_RegL + 1] = {
4716 0,
4717 0,
4718 OptoReg::Bad, // Op_RegN
4719 OptoReg::Bad, // Op_RegI
4720 RAX_H_num, // Op_RegP
4721 OptoReg::Bad, // Op_RegF
4722 XMM0b_num, // Op_RegD
4723 RAX_H_num // Op_RegL
4724 };
4725 // Excluded flags and vector registers.
4726 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4727 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4728 %}
4729 %}
4730
4731 //----------ATTRIBUTES---------------------------------------------------------
4732 //----------Operand Attributes-------------------------------------------------
4733 op_attrib op_cost(0); // Required cost attribute
4734
4735 //----------Instruction Attributes---------------------------------------------
4736 ins_attrib ins_cost(100); // Required cost attribute
4737 ins_attrib ins_size(8); // Required size attribute (in bits)
4738 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4739 // a non-matching short branch variant
4740 // of some long branch?
4741 ins_attrib ins_alignment(1); // Required alignment attribute (must
4742 // be a power of 2) specifies the
4743 // alignment that some part of the
4744 // instruction (not necessarily the
4745 // start) requires. If > 1, a
4746 // compute_padding() function must be
4747 // provided for the instruction
4748
4749 // Whether this node is expanded during code emission into a sequence of
4750 // instructions and the first instruction can perform an implicit null check.
4751 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4752
4753 //----------OPERANDS-----------------------------------------------------------
4754 // Operand definitions must precede instruction definitions for correct parsing
4755 // in the ADLC because operands constitute user defined types which are used in
4756 // instruction definitions.
4757
4758 //----------Simple Operands----------------------------------------------------
4759 // Immediate Operands
4760 // Integer Immediate
4761 operand immI()
4762 %{
4763 match(ConI);
4764
4765 op_cost(10);
4766 format %{ %}
4767 interface(CONST_INTER);
4768 %}
4769
4770 // Constant for test vs zero
4771 operand immI_0()
4772 %{
4773 predicate(n->get_int() == 0);
4774 match(ConI);
4775
4776 op_cost(0);
4777 format %{ %}
4778 interface(CONST_INTER);
4779 %}
4780
4781 // Constant for increment
4782 operand immI_1()
4783 %{
4784 predicate(n->get_int() == 1);
4785 match(ConI);
4786
4787 op_cost(0);
4788 format %{ %}
4789 interface(CONST_INTER);
4790 %}
4791
4792 // Constant for decrement
4793 operand immI_M1()
4794 %{
4795 predicate(n->get_int() == -1);
4796 match(ConI);
4797
4798 op_cost(0);
4799 format %{ %}
4800 interface(CONST_INTER);
4801 %}
4802
4803 operand immI_2()
4804 %{
4805 predicate(n->get_int() == 2);
4806 match(ConI);
4807
4808 op_cost(0);
4809 format %{ %}
4810 interface(CONST_INTER);
4811 %}
4812
4813 operand immI_4()
4814 %{
4815 predicate(n->get_int() == 4);
4816 match(ConI);
4817
4818 op_cost(0);
4819 format %{ %}
4820 interface(CONST_INTER);
4821 %}
4822
4823 operand immI_8()
4824 %{
4825 predicate(n->get_int() == 8);
4826 match(ConI);
4827
4828 op_cost(0);
4829 format %{ %}
4830 interface(CONST_INTER);
4831 %}
4832
4833 // Valid scale values for addressing modes
4834 operand immI2()
4835 %{
4836 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4837 match(ConI);
4838
4839 format %{ %}
4840 interface(CONST_INTER);
4841 %}
4842
4843 operand immU7()
4844 %{
4845 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4846 match(ConI);
4847
4848 op_cost(5);
4849 format %{ %}
4850 interface(CONST_INTER);
4851 %}
4852
4853 operand immI8()
4854 %{
4855 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4856 match(ConI);
4857
4858 op_cost(5);
4859 format %{ %}
4860 interface(CONST_INTER);
4861 %}
4862
4863 operand immU8()
4864 %{
4865 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4866 match(ConI);
4867
4868 op_cost(5);
4869 format %{ %}
4870 interface(CONST_INTER);
4871 %}
4872
4873 operand immI16()
4874 %{
4875 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4876 match(ConI);
4877
4878 op_cost(10);
4879 format %{ %}
4880 interface(CONST_INTER);
4881 %}
4882
4883 // Int Immediate non-negative
4884 operand immU31()
4885 %{
4886 predicate(n->get_int() >= 0);
4887 match(ConI);
4888
4889 op_cost(0);
4890 format %{ %}
4891 interface(CONST_INTER);
4892 %}
4893
4894 // Pointer Immediate
4895 operand immP()
4896 %{
4897 match(ConP);
4898
4899 op_cost(10);
4900 format %{ %}
4901 interface(CONST_INTER);
4902 %}
4903
4904 // Null Pointer Immediate
4905 operand immP0()
4906 %{
4907 predicate(n->get_ptr() == 0);
4908 match(ConP);
4909
4910 op_cost(5);
4911 format %{ %}
4912 interface(CONST_INTER);
4913 %}
4914
4915 // Pointer Immediate
4916 operand immN() %{
4917 match(ConN);
4918
4919 op_cost(10);
4920 format %{ %}
4921 interface(CONST_INTER);
4922 %}
4923
4924 operand immNKlass() %{
4925 match(ConNKlass);
4926
4927 op_cost(10);
4928 format %{ %}
4929 interface(CONST_INTER);
4930 %}
4931
4932 // Null Pointer Immediate
4933 operand immN0() %{
4934 predicate(n->get_narrowcon() == 0);
4935 match(ConN);
4936
4937 op_cost(5);
4938 format %{ %}
4939 interface(CONST_INTER);
4940 %}
4941
4942 operand immP31()
4943 %{
4944 predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
4945 && (n->get_ptr() >> 31) == 0);
4946 match(ConP);
4947
4948 op_cost(5);
4949 format %{ %}
4950 interface(CONST_INTER);
4951 %}
4952
4953
4954 // Long Immediate
4955 operand immL()
4956 %{
4957 match(ConL);
4958
4959 op_cost(20);
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 // Long Immediate 8-bit
4965 operand immL8()
4966 %{
4967 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4968 match(ConL);
4969
4970 op_cost(5);
4971 format %{ %}
4972 interface(CONST_INTER);
4973 %}
4974
4975 // Long Immediate 32-bit unsigned
4976 operand immUL32()
4977 %{
4978 predicate(n->get_long() == (unsigned int) (n->get_long()));
4979 match(ConL);
4980
4981 op_cost(10);
4982 format %{ %}
4983 interface(CONST_INTER);
4984 %}
4985
4986 // Long Immediate 32-bit signed
4987 operand immL32()
4988 %{
4989 predicate(n->get_long() == (int) (n->get_long()));
4990 match(ConL);
4991
4992 op_cost(15);
4993 format %{ %}
4994 interface(CONST_INTER);
4995 %}
4996
4997 operand immL_Pow2()
4998 %{
4999 predicate(is_power_of_2((julong)n->get_long()));
5000 match(ConL);
5001
5002 op_cost(15);
5003 format %{ %}
5004 interface(CONST_INTER);
5005 %}
5006
5007 operand immL_NotPow2()
5008 %{
5009 predicate(is_power_of_2((julong)~n->get_long()));
5010 match(ConL);
5011
5012 op_cost(15);
5013 format %{ %}
5014 interface(CONST_INTER);
5015 %}
5016
5017 // Long Immediate zero
5018 operand immL0()
5019 %{
5020 predicate(n->get_long() == 0L);
5021 match(ConL);
5022
5023 op_cost(10);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Constant for increment
5029 operand immL1()
5030 %{
5031 predicate(n->get_long() == 1);
5032 match(ConL);
5033
5034 format %{ %}
5035 interface(CONST_INTER);
5036 %}
5037
5038 // Constant for decrement
5039 operand immL_M1()
5040 %{
5041 predicate(n->get_long() == -1);
5042 match(ConL);
5043
5044 format %{ %}
5045 interface(CONST_INTER);
5046 %}
5047
5048 // Long Immediate: low 32-bit mask
5049 operand immL_32bits()
5050 %{
5051 predicate(n->get_long() == 0xFFFFFFFFL);
5052 match(ConL);
5053 op_cost(20);
5054
5055 format %{ %}
5056 interface(CONST_INTER);
5057 %}
5058
5059 // Int Immediate: 2^n-1, positive
5060 operand immI_Pow2M1()
5061 %{
5062 predicate((n->get_int() > 0)
5063 && is_power_of_2((juint)n->get_int() + 1));
5064 match(ConI);
5065
5066 op_cost(20);
5067 format %{ %}
5068 interface(CONST_INTER);
5069 %}
5070
5071 // Float Immediate zero
5072 operand immF0()
5073 %{
5074 predicate(jint_cast(n->getf()) == 0);
5075 match(ConF);
5076
5077 op_cost(5);
5078 format %{ %}
5079 interface(CONST_INTER);
5080 %}
5081
5082 // Float Immediate
5083 operand immF()
5084 %{
5085 match(ConF);
5086
5087 op_cost(15);
5088 format %{ %}
5089 interface(CONST_INTER);
5090 %}
5091
5092 // Half Float Immediate
5093 operand immH()
5094 %{
5095 match(ConH);
5096
5097 op_cost(15);
5098 format %{ %}
5099 interface(CONST_INTER);
5100 %}
5101
5102 // Double Immediate zero
5103 operand immD0()
5104 %{
5105 predicate(jlong_cast(n->getd()) == 0);
5106 match(ConD);
5107
5108 op_cost(5);
5109 format %{ %}
5110 interface(CONST_INTER);
5111 %}
5112
5113 // Double Immediate
5114 operand immD()
5115 %{
5116 match(ConD);
5117
5118 op_cost(15);
5119 format %{ %}
5120 interface(CONST_INTER);
5121 %}
5122
5123 // Immediates for special shifts (sign extend)
5124
5125 // Constants for increment
5126 operand immI_16()
5127 %{
5128 predicate(n->get_int() == 16);
5129 match(ConI);
5130
5131 format %{ %}
5132 interface(CONST_INTER);
5133 %}
5134
5135 operand immI_24()
5136 %{
5137 predicate(n->get_int() == 24);
5138 match(ConI);
5139
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 // Constant for byte-wide masking
5145 operand immI_255()
5146 %{
5147 predicate(n->get_int() == 255);
5148 match(ConI);
5149
5150 format %{ %}
5151 interface(CONST_INTER);
5152 %}
5153
5154 // Constant for short-wide masking
5155 operand immI_65535()
5156 %{
5157 predicate(n->get_int() == 65535);
5158 match(ConI);
5159
5160 format %{ %}
5161 interface(CONST_INTER);
5162 %}
5163
5164 // Constant for byte-wide masking
5165 operand immL_255()
5166 %{
5167 predicate(n->get_long() == 255);
5168 match(ConL);
5169
5170 format %{ %}
5171 interface(CONST_INTER);
5172 %}
5173
5174 // Constant for short-wide masking
5175 operand immL_65535()
5176 %{
5177 predicate(n->get_long() == 65535);
5178 match(ConL);
5179
5180 format %{ %}
5181 interface(CONST_INTER);
5182 %}
5183
5184 // AOT Runtime Constants Address
5185 operand immAOTRuntimeConstantsAddress()
5186 %{
5187 // Check if the address is in the range of AOT Runtime Constants
5188 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5189 match(ConP);
5190
5191 op_cost(0);
5192 format %{ %}
5193 interface(CONST_INTER);
5194 %}
5195
5196 operand kReg()
5197 %{
5198 constraint(ALLOC_IN_RC(vectmask_reg));
5199 match(RegVectMask);
5200 format %{%}
5201 interface(REG_INTER);
5202 %}
5203
5204 // Register Operands
5205 // Integer Register
5206 operand rRegI()
5207 %{
5208 constraint(ALLOC_IN_RC(int_reg));
5209 match(RegI);
5210
5211 match(rax_RegI);
5212 match(rbx_RegI);
5213 match(rcx_RegI);
5214 match(rdx_RegI);
5215 match(rdi_RegI);
5216
5217 format %{ %}
5218 interface(REG_INTER);
5219 %}
5220
5221 // Special Registers
5222 operand rax_RegI()
5223 %{
5224 constraint(ALLOC_IN_RC(int_rax_reg));
5225 match(RegI);
5226 match(rRegI);
5227
5228 format %{ "RAX" %}
5229 interface(REG_INTER);
5230 %}
5231
5232 // Special Registers
5233 operand rbx_RegI()
5234 %{
5235 constraint(ALLOC_IN_RC(int_rbx_reg));
5236 match(RegI);
5237 match(rRegI);
5238
5239 format %{ "RBX" %}
5240 interface(REG_INTER);
5241 %}
5242
5243 operand rcx_RegI()
5244 %{
5245 constraint(ALLOC_IN_RC(int_rcx_reg));
5246 match(RegI);
5247 match(rRegI);
5248
5249 format %{ "RCX" %}
5250 interface(REG_INTER);
5251 %}
5252
5253 operand rdx_RegI()
5254 %{
5255 constraint(ALLOC_IN_RC(int_rdx_reg));
5256 match(RegI);
5257 match(rRegI);
5258
5259 format %{ "RDX" %}
5260 interface(REG_INTER);
5261 %}
5262
5263 operand rdi_RegI()
5264 %{
5265 constraint(ALLOC_IN_RC(int_rdi_reg));
5266 match(RegI);
5267 match(rRegI);
5268
5269 format %{ "RDI" %}
5270 interface(REG_INTER);
5271 %}
5272
5273 operand no_rax_rdx_RegI()
5274 %{
5275 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5276 match(RegI);
5277 match(rbx_RegI);
5278 match(rcx_RegI);
5279 match(rdi_RegI);
5280
5281 format %{ %}
5282 interface(REG_INTER);
5283 %}
5284
5285 operand no_rbp_r13_RegI()
5286 %{
5287 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5288 match(RegI);
5289 match(rRegI);
5290 match(rax_RegI);
5291 match(rbx_RegI);
5292 match(rcx_RegI);
5293 match(rdx_RegI);
5294 match(rdi_RegI);
5295
5296 format %{ %}
5297 interface(REG_INTER);
5298 %}
5299
5300 // Pointer Register
5301 operand any_RegP()
5302 %{
5303 constraint(ALLOC_IN_RC(any_reg));
5304 match(RegP);
5305 match(rax_RegP);
5306 match(rbx_RegP);
5307 match(rdi_RegP);
5308 match(rsi_RegP);
5309 match(rbp_RegP);
5310 match(r15_RegP);
5311 match(rRegP);
5312
5313 format %{ %}
5314 interface(REG_INTER);
5315 %}
5316
5317 operand rRegP()
5318 %{
5319 constraint(ALLOC_IN_RC(ptr_reg));
5320 match(RegP);
5321 match(rax_RegP);
5322 match(rbx_RegP);
5323 match(rdi_RegP);
5324 match(rsi_RegP);
5325 match(rbp_RegP); // See Q&A below about
5326 match(r15_RegP); // r15_RegP and rbp_RegP.
5327
5328 format %{ %}
5329 interface(REG_INTER);
5330 %}
5331
5332 operand rRegN() %{
5333 constraint(ALLOC_IN_RC(int_reg));
5334 match(RegN);
5335
5336 format %{ %}
5337 interface(REG_INTER);
5338 %}
5339
5340 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5341 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5342 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5343 // The output of an instruction is controlled by the allocator, which respects
5344 // register class masks, not match rules. Unless an instruction mentions
5345 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5346 // by the allocator as an input.
5347 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5348 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5349 // result, RBP is not included in the output of the instruction either.
5350
5351 // This operand is not allowed to use RBP even if
5352 // RBP is not used to hold the frame pointer.
5353 operand no_rbp_RegP()
5354 %{
5355 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5356 match(RegP);
5357 match(rbx_RegP);
5358 match(rsi_RegP);
5359 match(rdi_RegP);
5360
5361 format %{ %}
5362 interface(REG_INTER);
5363 %}
5364
5365 // Special Registers
5366 // Return a pointer value
5367 operand rax_RegP()
5368 %{
5369 constraint(ALLOC_IN_RC(ptr_rax_reg));
5370 match(RegP);
5371 match(rRegP);
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 // Special Registers
5378 // Return a compressed pointer value
5379 operand rax_RegN()
5380 %{
5381 constraint(ALLOC_IN_RC(int_rax_reg));
5382 match(RegN);
5383 match(rRegN);
5384
5385 format %{ %}
5386 interface(REG_INTER);
5387 %}
5388
5389 // Used in AtomicAdd
5390 operand rbx_RegP()
5391 %{
5392 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5393 match(RegP);
5394 match(rRegP);
5395
5396 format %{ %}
5397 interface(REG_INTER);
5398 %}
5399
5400 operand rsi_RegP()
5401 %{
5402 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5403 match(RegP);
5404 match(rRegP);
5405
5406 format %{ %}
5407 interface(REG_INTER);
5408 %}
5409
5410 operand rbp_RegP()
5411 %{
5412 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5413 match(RegP);
5414 match(rRegP);
5415
5416 format %{ %}
5417 interface(REG_INTER);
5418 %}
5419
5420 // Used in rep stosq
5421 operand rdi_RegP()
5422 %{
5423 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5424 match(RegP);
5425 match(rRegP);
5426
5427 format %{ %}
5428 interface(REG_INTER);
5429 %}
5430
5431 operand r15_RegP()
5432 %{
5433 constraint(ALLOC_IN_RC(ptr_r15_reg));
5434 match(RegP);
5435 match(rRegP);
5436
5437 format %{ %}
5438 interface(REG_INTER);
5439 %}
5440
5441 operand rRegL()
5442 %{
5443 constraint(ALLOC_IN_RC(long_reg));
5444 match(RegL);
5445 match(rax_RegL);
5446 match(rdx_RegL);
5447
5448 format %{ %}
5449 interface(REG_INTER);
5450 %}
5451
5452 // Special Registers
5453 operand no_rax_rdx_RegL()
5454 %{
5455 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5456 match(RegL);
5457 match(rRegL);
5458
5459 format %{ %}
5460 interface(REG_INTER);
5461 %}
5462
5463 operand rax_RegL()
5464 %{
5465 constraint(ALLOC_IN_RC(long_rax_reg));
5466 match(RegL);
5467 match(rRegL);
5468
5469 format %{ "RAX" %}
5470 interface(REG_INTER);
5471 %}
5472
5473 operand rcx_RegL()
5474 %{
5475 constraint(ALLOC_IN_RC(long_rcx_reg));
5476 match(RegL);
5477 match(rRegL);
5478
5479 format %{ %}
5480 interface(REG_INTER);
5481 %}
5482
5483 operand rdx_RegL()
5484 %{
5485 constraint(ALLOC_IN_RC(long_rdx_reg));
5486 match(RegL);
5487 match(rRegL);
5488
5489 format %{ %}
5490 interface(REG_INTER);
5491 %}
5492
5493 operand r11_RegL()
5494 %{
5495 constraint(ALLOC_IN_RC(long_r11_reg));
5496 match(RegL);
5497 match(rRegL);
5498
5499 format %{ %}
5500 interface(REG_INTER);
5501 %}
5502
5503 operand no_rbp_r13_RegL()
5504 %{
5505 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5506 match(RegL);
5507 match(rRegL);
5508 match(rax_RegL);
5509 match(rcx_RegL);
5510 match(rdx_RegL);
5511
5512 format %{ %}
5513 interface(REG_INTER);
5514 %}
5515
5516 // Flags register, used as output of compare instructions
5517 operand rFlagsReg()
5518 %{
5519 constraint(ALLOC_IN_RC(int_flags));
5520 match(RegFlags);
5521
5522 format %{ "RFLAGS" %}
5523 interface(REG_INTER);
5524 %}
5525
5526 // Flags register, used as output of FLOATING POINT compare instructions
5527 operand rFlagsRegU()
5528 %{
5529 constraint(ALLOC_IN_RC(int_flags));
5530 match(RegFlags);
5531
5532 format %{ "RFLAGS_U" %}
5533 interface(REG_INTER);
5534 %}
5535
5536 operand rFlagsRegUCF() %{
5537 constraint(ALLOC_IN_RC(int_flags));
5538 match(RegFlags);
5539 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5540
5541 format %{ "RFLAGS_U_CF" %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand rFlagsRegUCFE() %{
5546 constraint(ALLOC_IN_RC(int_flags));
5547 match(RegFlags);
5548 predicate(UseAPX && VM_Version::supports_avx10_2());
5549
5550 format %{ "RFLAGS_U_CFE" %}
5551 interface(REG_INTER);
5552 %}
5553
5554 // Float register operands
5555 operand regF() %{
5556 constraint(ALLOC_IN_RC(float_reg));
5557 match(RegF);
5558
5559 format %{ %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Float register operands
5564 operand legRegF() %{
5565 constraint(ALLOC_IN_RC(float_reg_legacy));
5566 match(RegF);
5567
5568 format %{ %}
5569 interface(REG_INTER);
5570 %}
5571
5572 // Float register operands
5573 operand vlRegF() %{
5574 constraint(ALLOC_IN_RC(float_reg_vl));
5575 match(RegF);
5576
5577 format %{ %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Double register operands
5582 operand regD() %{
5583 constraint(ALLOC_IN_RC(double_reg));
5584 match(RegD);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Double register operands
5591 operand legRegD() %{
5592 constraint(ALLOC_IN_RC(double_reg_legacy));
5593 match(RegD);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Double register operands
5600 operand vlRegD() %{
5601 constraint(ALLOC_IN_RC(double_reg_vl));
5602 match(RegD);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 //----------Memory Operands----------------------------------------------------
5609 // Direct Memory Operand
5610 // operand direct(immP addr)
5611 // %{
5612 // match(addr);
5613
5614 // format %{ "[$addr]" %}
5615 // interface(MEMORY_INTER) %{
5616 // base(0xFFFFFFFF);
5617 // index(0x4);
5618 // scale(0x0);
5619 // disp($addr);
5620 // %}
5621 // %}
5622
5623 // Indirect Memory Operand
5624 operand indirect(any_RegP reg)
5625 %{
5626 constraint(ALLOC_IN_RC(ptr_reg));
5627 match(reg);
5628
5629 format %{ "[$reg]" %}
5630 interface(MEMORY_INTER) %{
5631 base($reg);
5632 index(0x4);
5633 scale(0x0);
5634 disp(0x0);
5635 %}
5636 %}
5637
5638 // Indirect Memory Plus Short Offset Operand
5639 operand indOffset8(any_RegP reg, immL8 off)
5640 %{
5641 constraint(ALLOC_IN_RC(ptr_reg));
5642 match(AddP reg off);
5643
5644 format %{ "[$reg + $off (8-bit)]" %}
5645 interface(MEMORY_INTER) %{
5646 base($reg);
5647 index(0x4);
5648 scale(0x0);
5649 disp($off);
5650 %}
5651 %}
5652
5653 // Indirect Memory Plus Long Offset Operand
5654 operand indOffset32(any_RegP reg, immL32 off)
5655 %{
5656 constraint(ALLOC_IN_RC(ptr_reg));
5657 match(AddP reg off);
5658
5659 format %{ "[$reg + $off (32-bit)]" %}
5660 interface(MEMORY_INTER) %{
5661 base($reg);
5662 index(0x4);
5663 scale(0x0);
5664 disp($off);
5665 %}
5666 %}
5667
5668 // Indirect Memory Plus Index Register Plus Offset Operand
5669 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5670 %{
5671 constraint(ALLOC_IN_RC(ptr_reg));
5672 match(AddP (AddP reg lreg) off);
5673
5674 op_cost(10);
5675 format %{"[$reg + $off + $lreg]" %}
5676 interface(MEMORY_INTER) %{
5677 base($reg);
5678 index($lreg);
5679 scale(0x0);
5680 disp($off);
5681 %}
5682 %}
5683
5684 // Indirect Memory Plus Index Register Plus Offset Operand
5685 operand indIndex(any_RegP reg, rRegL lreg)
5686 %{
5687 constraint(ALLOC_IN_RC(ptr_reg));
5688 match(AddP reg lreg);
5689
5690 op_cost(10);
5691 format %{"[$reg + $lreg]" %}
5692 interface(MEMORY_INTER) %{
5693 base($reg);
5694 index($lreg);
5695 scale(0x0);
5696 disp(0x0);
5697 %}
5698 %}
5699
5700 // Indirect Memory Times Scale Plus Index Register
5701 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5702 %{
5703 constraint(ALLOC_IN_RC(ptr_reg));
5704 match(AddP reg (LShiftL lreg scale));
5705
5706 op_cost(10);
5707 format %{"[$reg + $lreg << $scale]" %}
5708 interface(MEMORY_INTER) %{
5709 base($reg);
5710 index($lreg);
5711 scale($scale);
5712 disp(0x0);
5713 %}
5714 %}
5715
5716 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5717 %{
5718 constraint(ALLOC_IN_RC(ptr_reg));
5719 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5720 match(AddP reg (LShiftL (ConvI2L idx) scale));
5721
5722 op_cost(10);
5723 format %{"[$reg + pos $idx << $scale]" %}
5724 interface(MEMORY_INTER) %{
5725 base($reg);
5726 index($idx);
5727 scale($scale);
5728 disp(0x0);
5729 %}
5730 %}
5731
5732 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5733 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5734 %{
5735 constraint(ALLOC_IN_RC(ptr_reg));
5736 match(AddP (AddP reg (LShiftL lreg scale)) off);
5737
5738 op_cost(10);
5739 format %{"[$reg + $off + $lreg << $scale]" %}
5740 interface(MEMORY_INTER) %{
5741 base($reg);
5742 index($lreg);
5743 scale($scale);
5744 disp($off);
5745 %}
5746 %}
5747
5748 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5749 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5750 %{
5751 constraint(ALLOC_IN_RC(ptr_reg));
5752 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5753 match(AddP (AddP reg (ConvI2L idx)) off);
5754
5755 op_cost(10);
5756 format %{"[$reg + $off + $idx]" %}
5757 interface(MEMORY_INTER) %{
5758 base($reg);
5759 index($idx);
5760 scale(0x0);
5761 disp($off);
5762 %}
5763 %}
5764
5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5767 %{
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5770 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5771
5772 op_cost(10);
5773 format %{"[$reg + $off + $idx << $scale]" %}
5774 interface(MEMORY_INTER) %{
5775 base($reg);
5776 index($idx);
5777 scale($scale);
5778 disp($off);
5779 %}
5780 %}
5781
5782 // Indirect Narrow Oop Plus Offset Operand
5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5784 // we can't free r12 even with CompressedOops::base() == nullptr.
5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5786 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(AddP (DecodeN reg) off);
5789
5790 op_cost(10);
5791 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5792 interface(MEMORY_INTER) %{
5793 base(0xc); // R12
5794 index($reg);
5795 scale(0x3);
5796 disp($off);
5797 %}
5798 %}
5799
5800 // Indirect Memory Operand
5801 operand indirectNarrow(rRegN reg)
5802 %{
5803 predicate(CompressedOops::shift() == 0);
5804 constraint(ALLOC_IN_RC(ptr_reg));
5805 match(DecodeN reg);
5806
5807 format %{ "[$reg]" %}
5808 interface(MEMORY_INTER) %{
5809 base($reg);
5810 index(0x4);
5811 scale(0x0);
5812 disp(0x0);
5813 %}
5814 %}
5815
5816 // Indirect Memory Plus Short Offset Operand
5817 operand indOffset8Narrow(rRegN reg, immL8 off)
5818 %{
5819 predicate(CompressedOops::shift() == 0);
5820 constraint(ALLOC_IN_RC(ptr_reg));
5821 match(AddP (DecodeN reg) off);
5822
5823 format %{ "[$reg + $off (8-bit)]" %}
5824 interface(MEMORY_INTER) %{
5825 base($reg);
5826 index(0x4);
5827 scale(0x0);
5828 disp($off);
5829 %}
5830 %}
5831
5832 // Indirect Memory Plus Long Offset Operand
5833 operand indOffset32Narrow(rRegN reg, immL32 off)
5834 %{
5835 predicate(CompressedOops::shift() == 0);
5836 constraint(ALLOC_IN_RC(ptr_reg));
5837 match(AddP (DecodeN reg) off);
5838
5839 format %{ "[$reg + $off (32-bit)]" %}
5840 interface(MEMORY_INTER) %{
5841 base($reg);
5842 index(0x4);
5843 scale(0x0);
5844 disp($off);
5845 %}
5846 %}
5847
5848 // Indirect Memory Plus Index Register Plus Offset Operand
5849 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5850 %{
5851 predicate(CompressedOops::shift() == 0);
5852 constraint(ALLOC_IN_RC(ptr_reg));
5853 match(AddP (AddP (DecodeN reg) lreg) off);
5854
5855 op_cost(10);
5856 format %{"[$reg + $off + $lreg]" %}
5857 interface(MEMORY_INTER) %{
5858 base($reg);
5859 index($lreg);
5860 scale(0x0);
5861 disp($off);
5862 %}
5863 %}
5864
5865 // Indirect Memory Plus Index Register Plus Offset Operand
5866 operand indIndexNarrow(rRegN reg, rRegL lreg)
5867 %{
5868 predicate(CompressedOops::shift() == 0);
5869 constraint(ALLOC_IN_RC(ptr_reg));
5870 match(AddP (DecodeN reg) lreg);
5871
5872 op_cost(10);
5873 format %{"[$reg + $lreg]" %}
5874 interface(MEMORY_INTER) %{
5875 base($reg);
5876 index($lreg);
5877 scale(0x0);
5878 disp(0x0);
5879 %}
5880 %}
5881
5882 // Indirect Memory Times Scale Plus Index Register
5883 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5884 %{
5885 predicate(CompressedOops::shift() == 0);
5886 constraint(ALLOC_IN_RC(ptr_reg));
5887 match(AddP (DecodeN reg) (LShiftL lreg scale));
5888
5889 op_cost(10);
5890 format %{"[$reg + $lreg << $scale]" %}
5891 interface(MEMORY_INTER) %{
5892 base($reg);
5893 index($lreg);
5894 scale($scale);
5895 disp(0x0);
5896 %}
5897 %}
5898
5899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5900 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5901 %{
5902 predicate(CompressedOops::shift() == 0);
5903 constraint(ALLOC_IN_RC(ptr_reg));
5904 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5905
5906 op_cost(10);
5907 format %{"[$reg + $off + $lreg << $scale]" %}
5908 interface(MEMORY_INTER) %{
5909 base($reg);
5910 index($lreg);
5911 scale($scale);
5912 disp($off);
5913 %}
5914 %}
5915
5916 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5917 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5918 %{
5919 constraint(ALLOC_IN_RC(ptr_reg));
5920 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5921 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5922
5923 op_cost(10);
5924 format %{"[$reg + $off + $idx]" %}
5925 interface(MEMORY_INTER) %{
5926 base($reg);
5927 index($idx);
5928 scale(0x0);
5929 disp($off);
5930 %}
5931 %}
5932
5933 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5934 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5935 %{
5936 constraint(ALLOC_IN_RC(ptr_reg));
5937 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5938 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5939
5940 op_cost(10);
5941 format %{"[$reg + $off + $idx << $scale]" %}
5942 interface(MEMORY_INTER) %{
5943 base($reg);
5944 index($idx);
5945 scale($scale);
5946 disp($off);
5947 %}
5948 %}
5949
5950 //----------Special Memory Operands--------------------------------------------
5951 // Stack Slot Operand - This operand is used for loading and storing temporary
5952 // values on the stack where a match requires a value to
5953 // flow through memory.
5954 operand stackSlotP(sRegP reg)
5955 %{
5956 constraint(ALLOC_IN_RC(stack_slots));
5957 // No match rule because this operand is only generated in matching
5958
5959 format %{ "[$reg]" %}
5960 interface(MEMORY_INTER) %{
5961 base(0x4); // RSP
5962 index(0x4); // No Index
5963 scale(0x0); // No Scale
5964 disp($reg); // Stack Offset
5965 %}
5966 %}
5967
5968 operand stackSlotI(sRegI reg)
5969 %{
5970 constraint(ALLOC_IN_RC(stack_slots));
5971 // No match rule because this operand is only generated in matching
5972
5973 format %{ "[$reg]" %}
5974 interface(MEMORY_INTER) %{
5975 base(0x4); // RSP
5976 index(0x4); // No Index
5977 scale(0x0); // No Scale
5978 disp($reg); // Stack Offset
5979 %}
5980 %}
5981
5982 operand stackSlotF(sRegF reg)
5983 %{
5984 constraint(ALLOC_IN_RC(stack_slots));
5985 // No match rule because this operand is only generated in matching
5986
5987 format %{ "[$reg]" %}
5988 interface(MEMORY_INTER) %{
5989 base(0x4); // RSP
5990 index(0x4); // No Index
5991 scale(0x0); // No Scale
5992 disp($reg); // Stack Offset
5993 %}
5994 %}
5995
5996 operand stackSlotD(sRegD reg)
5997 %{
5998 constraint(ALLOC_IN_RC(stack_slots));
5999 // No match rule because this operand is only generated in matching
6000
6001 format %{ "[$reg]" %}
6002 interface(MEMORY_INTER) %{
6003 base(0x4); // RSP
6004 index(0x4); // No Index
6005 scale(0x0); // No Scale
6006 disp($reg); // Stack Offset
6007 %}
6008 %}
6009 operand stackSlotL(sRegL reg)
6010 %{
6011 constraint(ALLOC_IN_RC(stack_slots));
6012 // No match rule because this operand is only generated in matching
6013
6014 format %{ "[$reg]" %}
6015 interface(MEMORY_INTER) %{
6016 base(0x4); // RSP
6017 index(0x4); // No Index
6018 scale(0x0); // No Scale
6019 disp($reg); // Stack Offset
6020 %}
6021 %}
6022
6023 //----------Conditional Branch Operands----------------------------------------
6024 // Comparison Op - This is the operation of the comparison, and is limited to
6025 // the following set of codes:
6026 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6027 //
6028 // Other attributes of the comparison, such as unsignedness, are specified
6029 // by the comparison instruction that sets a condition code flags register.
6030 // That result is represented by a flags operand whose subtype is appropriate
6031 // to the unsignedness (etc.) of the comparison.
6032 //
6033 // Later, the instruction which matches both the Comparison Op (a Bool) and
6034 // the flags (produced by the Cmp) specifies the coding of the comparison op
6035 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6036
6037 // Comparison Code
6038 operand cmpOp()
6039 %{
6040 match(Bool);
6041
6042 format %{ "" %}
6043 interface(COND_INTER) %{
6044 equal(0x4, "e");
6045 not_equal(0x5, "ne");
6046 less(0xc, "l");
6047 greater_equal(0xd, "ge");
6048 less_equal(0xe, "le");
6049 greater(0xf, "g");
6050 overflow(0x0, "o");
6051 no_overflow(0x1, "no");
6052 %}
6053 %}
6054
6055 // Comparison Code, unsigned compare. Used by FP also, with
6056 // C2 (unordered) turned into GT or LT already. The other bits
6057 // C0 and C3 are turned into Carry & Zero flags.
6058 operand cmpOpU()
6059 %{
6060 match(Bool);
6061
6062 format %{ "" %}
6063 interface(COND_INTER) %{
6064 equal(0x4, "e");
6065 not_equal(0x5, "ne");
6066 less(0x2, "b");
6067 greater_equal(0x3, "ae");
6068 less_equal(0x6, "be");
6069 greater(0x7, "a");
6070 overflow(0x0, "o");
6071 no_overflow(0x1, "no");
6072 %}
6073 %}
6074
6075
6076 // Floating comparisons that don't require any fixup for the unordered case,
6077 // If both inputs of the comparison are the same, ZF is always set so we
6078 // don't need to use cmpOpUCF2 for eq/ne
6079 operand cmpOpUCF() %{
6080 match(Bool);
6081 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6082 (n->as_Bool()->_test._test == BoolTest::lt ||
6083 n->as_Bool()->_test._test == BoolTest::ge ||
6084 n->as_Bool()->_test._test == BoolTest::le ||
6085 n->as_Bool()->_test._test == BoolTest::gt ||
6086 n->in(1)->in(1) == n->in(1)->in(2)));
6087 format %{ "" %}
6088 interface(COND_INTER) %{
6089 equal(0xb, "np");
6090 not_equal(0xa, "p");
6091 less(0x2, "b");
6092 greater_equal(0x3, "ae");
6093 less_equal(0x6, "be");
6094 greater(0x7, "a");
6095 overflow(0x0, "o");
6096 no_overflow(0x1, "no");
6097 %}
6098 %}
6099
6100
6101 // Floating comparisons that can be fixed up with extra conditional jumps
6102 operand cmpOpUCF2() %{
6103 match(Bool);
6104 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6105 (n->as_Bool()->_test._test == BoolTest::ne ||
6106 n->as_Bool()->_test._test == BoolTest::eq) &&
6107 n->in(1)->in(1) != n->in(1)->in(2));
6108 format %{ "" %}
6109 interface(COND_INTER) %{
6110 equal(0x4, "e");
6111 not_equal(0x5, "ne");
6112 less(0x2, "b");
6113 greater_equal(0x3, "ae");
6114 less_equal(0x6, "be");
6115 greater(0x7, "a");
6116 overflow(0x0, "o");
6117 no_overflow(0x1, "no");
6118 %}
6119 %}
6120
6121
6122 // Floating point comparisons that set condition flags to test more directly,
6123 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6124 // are used for L (<) and LE (<=) conditions. It's important to convert these
6125 // latter conditions to ones that use unsigned tests before passing into an
6126 // instruction because the preceding comparison might be based on a three way
6127 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6128 operand cmpOpUCFE()
6129 %{
6130 match(Bool);
6131 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6132 (n->as_Bool()->_test._test == BoolTest::ne ||
6133 n->as_Bool()->_test._test == BoolTest::eq ||
6134 n->as_Bool()->_test._test == BoolTest::lt ||
6135 n->as_Bool()->_test._test == BoolTest::ge ||
6136 n->as_Bool()->_test._test == BoolTest::le ||
6137 n->as_Bool()->_test._test == BoolTest::gt));
6138
6139 format %{ "" %}
6140 interface(COND_INTER) %{
6141 equal(0x4, "e");
6142 not_equal(0x5, "ne");
6143 less(0x2, "b");
6144 greater_equal(0x3, "ae");
6145 less_equal(0x6, "be");
6146 greater(0x7, "a");
6147 overflow(0x0, "o");
6148 no_overflow(0x1, "no");
6149 %}
6150 %}
6151
6152 // Operands for bound floating pointer register arguments
6153 operand rxmm0() %{
6154 constraint(ALLOC_IN_RC(xmm0_reg));
6155 match(VecX);
6156 format%{%}
6157 interface(REG_INTER);
6158 %}
6159
6160 // Vectors
6161
6162 // Dummy generic vector class. Should be used for all vector operands.
6163 // Replaced with vec[SDXYZ] during post-selection pass.
6164 operand vec() %{
6165 constraint(ALLOC_IN_RC(dynamic));
6166 match(VecX);
6167 match(VecY);
6168 match(VecZ);
6169 match(VecS);
6170 match(VecD);
6171
6172 format %{ %}
6173 interface(REG_INTER);
6174 %}
6175
6176 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6177 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6178 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6179 // runtime code generation via reg_class_dynamic.
6180 operand legVec() %{
6181 constraint(ALLOC_IN_RC(dynamic));
6182 match(VecX);
6183 match(VecY);
6184 match(VecZ);
6185 match(VecS);
6186 match(VecD);
6187
6188 format %{ %}
6189 interface(REG_INTER);
6190 %}
6191
6192 // Replaces vec during post-selection cleanup. See above.
6193 operand vecS() %{
6194 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6195 match(VecS);
6196
6197 format %{ %}
6198 interface(REG_INTER);
6199 %}
6200
6201 // Replaces legVec during post-selection cleanup. See above.
6202 operand legVecS() %{
6203 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6204 match(VecS);
6205
6206 format %{ %}
6207 interface(REG_INTER);
6208 %}
6209
6210 // Replaces vec during post-selection cleanup. See above.
6211 operand vecD() %{
6212 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6213 match(VecD);
6214
6215 format %{ %}
6216 interface(REG_INTER);
6217 %}
6218
6219 // Replaces legVec during post-selection cleanup. See above.
6220 operand legVecD() %{
6221 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6222 match(VecD);
6223
6224 format %{ %}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Replaces vec during post-selection cleanup. See above.
6229 operand vecX() %{
6230 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6231 match(VecX);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Replaces legVec during post-selection cleanup. See above.
6238 operand legVecX() %{
6239 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6240 match(VecX);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces vec during post-selection cleanup. See above.
6247 operand vecY() %{
6248 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6249 match(VecY);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces legVec during post-selection cleanup. See above.
6256 operand legVecY() %{
6257 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6258 match(VecY);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces vec during post-selection cleanup. See above.
6265 operand vecZ() %{
6266 constraint(ALLOC_IN_RC(vectorz_reg));
6267 match(VecZ);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces legVec during post-selection cleanup. See above.
6274 operand legVecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 //----------OPERAND CLASSES----------------------------------------------------
6283 // Operand Classes are groups of operands that are used as to simplify
6284 // instruction definitions by not requiring the AD writer to specify separate
6285 // instructions for every form of operand when the instruction accepts
6286 // multiple operand types with the same basic encoding and format. The classic
6287 // case of this is memory operands.
6288
6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6290 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6291 indCompressedOopOffset,
6292 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6293 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6294 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6295
6296 //----------PIPELINE-----------------------------------------------------------
6297 // Rules which define the behavior of the target architectures pipeline.
6298 pipeline %{
6299
6300 //----------ATTRIBUTES---------------------------------------------------------
6301 attributes %{
6302 variable_size_instructions; // Fixed size instructions
6303 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6304 instruction_unit_size = 1; // An instruction is 1 bytes long
6305 instruction_fetch_unit_size = 16; // The processor fetches one line
6306 instruction_fetch_units = 1; // of 16 bytes
6307 %}
6308
6309 //----------RESOURCES----------------------------------------------------------
6310 // Resources are the functional units available to the machine
6311
6312 // Generic P2/P3 pipeline
6313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6314 // 3 instructions decoded per cycle.
6315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6316 // 3 ALU op, only ALU0 handles mul instructions.
6317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6318 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6319 BR, FPU,
6320 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6321
6322 //----------PIPELINE DESCRIPTION-----------------------------------------------
6323 // Pipeline Description specifies the stages in the machine's pipeline
6324
6325 // Generic P2/P3 pipeline
6326 pipe_desc(S0, S1, S2, S3, S4, S5);
6327
6328 //----------PIPELINE CLASSES---------------------------------------------------
6329 // Pipeline Classes describe the stages in which input and output are
6330 // referenced by the hardware pipeline.
6331
6332 // Naming convention: ialu or fpu
6333 // Then: _reg
6334 // Then: _reg if there is a 2nd register
6335 // Then: _long if it's a pair of instructions implementing a long
6336 // Then: _fat if it requires the big decoder
6337 // Or: _mem if it requires the big decoder and a memory unit.
6338
6339 // Integer ALU reg operation
6340 pipe_class ialu_reg(rRegI dst)
6341 %{
6342 single_instruction;
6343 dst : S4(write);
6344 dst : S3(read);
6345 DECODE : S0; // any decoder
6346 ALU : S3; // any alu
6347 %}
6348
6349 // Long ALU reg operation
6350 pipe_class ialu_reg_long(rRegL dst)
6351 %{
6352 instruction_count(2);
6353 dst : S4(write);
6354 dst : S3(read);
6355 DECODE : S0(2); // any 2 decoders
6356 ALU : S3(2); // both alus
6357 %}
6358
6359 // Integer ALU reg operation using big decoder
6360 pipe_class ialu_reg_fat(rRegI dst)
6361 %{
6362 single_instruction;
6363 dst : S4(write);
6364 dst : S3(read);
6365 D0 : S0; // big decoder only
6366 ALU : S3; // any alu
6367 %}
6368
6369 // Integer ALU reg-reg operation
6370 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6371 %{
6372 single_instruction;
6373 dst : S4(write);
6374 src : S3(read);
6375 DECODE : S0; // any decoder
6376 ALU : S3; // any alu
6377 %}
6378
6379 // Integer ALU reg-reg operation
6380 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6381 %{
6382 single_instruction;
6383 dst : S4(write);
6384 src : S3(read);
6385 D0 : S0; // big decoder only
6386 ALU : S3; // any alu
6387 %}
6388
6389 // Integer ALU reg-mem operation
6390 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6391 %{
6392 single_instruction;
6393 dst : S5(write);
6394 mem : S3(read);
6395 D0 : S0; // big decoder only
6396 ALU : S4; // any alu
6397 MEM : S3; // any mem
6398 %}
6399
6400 // Integer mem operation (prefetch)
6401 pipe_class ialu_mem(memory mem)
6402 %{
6403 single_instruction;
6404 mem : S3(read);
6405 D0 : S0; // big decoder only
6406 MEM : S3; // any mem
6407 %}
6408
6409 // Integer Store to Memory
6410 pipe_class ialu_mem_reg(memory mem, rRegI src)
6411 %{
6412 single_instruction;
6413 mem : S3(read);
6414 src : S5(read);
6415 D0 : S0; // big decoder only
6416 ALU : S4; // any alu
6417 MEM : S3;
6418 %}
6419
6420 // // Long Store to Memory
6421 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6422 // %{
6423 // instruction_count(2);
6424 // mem : S3(read);
6425 // src : S5(read);
6426 // D0 : S0(2); // big decoder only; twice
6427 // ALU : S4(2); // any 2 alus
6428 // MEM : S3(2); // Both mems
6429 // %}
6430
6431 // Integer Store to Memory
6432 pipe_class ialu_mem_imm(memory mem)
6433 %{
6434 single_instruction;
6435 mem : S3(read);
6436 D0 : S0; // big decoder only
6437 ALU : S4; // any alu
6438 MEM : S3;
6439 %}
6440
6441 // Integer ALU0 reg-reg operation
6442 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6443 %{
6444 single_instruction;
6445 dst : S4(write);
6446 src : S3(read);
6447 D0 : S0; // Big decoder only
6448 ALU0 : S3; // only alu0
6449 %}
6450
6451 // Integer ALU0 reg-mem operation
6452 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6453 %{
6454 single_instruction;
6455 dst : S5(write);
6456 mem : S3(read);
6457 D0 : S0; // big decoder only
6458 ALU0 : S4; // ALU0 only
6459 MEM : S3; // any mem
6460 %}
6461
6462 // Integer ALU reg-reg operation
6463 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6464 %{
6465 single_instruction;
6466 cr : S4(write);
6467 src1 : S3(read);
6468 src2 : S3(read);
6469 DECODE : S0; // any decoder
6470 ALU : S3; // any alu
6471 %}
6472
6473 // Integer ALU reg-imm operation
6474 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6475 %{
6476 single_instruction;
6477 cr : S4(write);
6478 src1 : S3(read);
6479 DECODE : S0; // any decoder
6480 ALU : S3; // any alu
6481 %}
6482
6483 // Integer ALU reg-mem operation
6484 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6485 %{
6486 single_instruction;
6487 cr : S4(write);
6488 src1 : S3(read);
6489 src2 : S3(read);
6490 D0 : S0; // big decoder only
6491 ALU : S4; // any alu
6492 MEM : S3;
6493 %}
6494
6495 // Conditional move reg-reg
6496 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6497 %{
6498 instruction_count(4);
6499 y : S4(read);
6500 q : S3(read);
6501 p : S3(read);
6502 DECODE : S0(4); // any decoder
6503 %}
6504
6505 // Conditional move reg-reg
6506 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6507 %{
6508 single_instruction;
6509 dst : S4(write);
6510 src : S3(read);
6511 cr : S3(read);
6512 DECODE : S0; // any decoder
6513 %}
6514
6515 // Conditional move reg-mem
6516 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6517 %{
6518 single_instruction;
6519 dst : S4(write);
6520 src : S3(read);
6521 cr : S3(read);
6522 DECODE : S0; // any decoder
6523 MEM : S3;
6524 %}
6525
6526 // Conditional move reg-reg long
6527 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6528 %{
6529 single_instruction;
6530 dst : S4(write);
6531 src : S3(read);
6532 cr : S3(read);
6533 DECODE : S0(2); // any 2 decoders
6534 %}
6535
6536 // Float reg-reg operation
6537 pipe_class fpu_reg(regD dst)
6538 %{
6539 instruction_count(2);
6540 dst : S3(read);
6541 DECODE : S0(2); // any 2 decoders
6542 FPU : S3;
6543 %}
6544
6545 // Float reg-reg operation
6546 pipe_class fpu_reg_reg(regD dst, regD src)
6547 %{
6548 instruction_count(2);
6549 dst : S4(write);
6550 src : S3(read);
6551 DECODE : S0(2); // any 2 decoders
6552 FPU : S3;
6553 %}
6554
6555 // Float reg-reg operation
6556 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6557 %{
6558 instruction_count(3);
6559 dst : S4(write);
6560 src1 : S3(read);
6561 src2 : S3(read);
6562 DECODE : S0(3); // any 3 decoders
6563 FPU : S3(2);
6564 %}
6565
6566 // Float reg-reg operation
6567 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6568 %{
6569 instruction_count(4);
6570 dst : S4(write);
6571 src1 : S3(read);
6572 src2 : S3(read);
6573 src3 : S3(read);
6574 DECODE : S0(4); // any 3 decoders
6575 FPU : S3(2);
6576 %}
6577
6578 // Float reg-reg operation
6579 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6580 %{
6581 instruction_count(4);
6582 dst : S4(write);
6583 src1 : S3(read);
6584 src2 : S3(read);
6585 src3 : S3(read);
6586 DECODE : S1(3); // any 3 decoders
6587 D0 : S0; // Big decoder only
6588 FPU : S3(2);
6589 MEM : S3;
6590 %}
6591
6592 // Float reg-mem operation
6593 pipe_class fpu_reg_mem(regD dst, memory mem)
6594 %{
6595 instruction_count(2);
6596 dst : S5(write);
6597 mem : S3(read);
6598 D0 : S0; // big decoder only
6599 DECODE : S1; // any decoder for FPU POP
6600 FPU : S4;
6601 MEM : S3; // any mem
6602 %}
6603
6604 // Float reg-mem operation
6605 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6606 %{
6607 instruction_count(3);
6608 dst : S5(write);
6609 src1 : S3(read);
6610 mem : S3(read);
6611 D0 : S0; // big decoder only
6612 DECODE : S1(2); // any decoder for FPU POP
6613 FPU : S4;
6614 MEM : S3; // any mem
6615 %}
6616
6617 // Float mem-reg operation
6618 pipe_class fpu_mem_reg(memory mem, regD src)
6619 %{
6620 instruction_count(2);
6621 src : S5(read);
6622 mem : S3(read);
6623 DECODE : S0; // any decoder for FPU PUSH
6624 D0 : S1; // big decoder only
6625 FPU : S4;
6626 MEM : S3; // any mem
6627 %}
6628
6629 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6630 %{
6631 instruction_count(3);
6632 src1 : S3(read);
6633 src2 : S3(read);
6634 mem : S3(read);
6635 DECODE : S0(2); // any decoder for FPU PUSH
6636 D0 : S1; // big decoder only
6637 FPU : S4;
6638 MEM : S3; // any mem
6639 %}
6640
6641 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6642 %{
6643 instruction_count(3);
6644 src1 : S3(read);
6645 src2 : S3(read);
6646 mem : S4(read);
6647 DECODE : S0; // any decoder for FPU PUSH
6648 D0 : S0(2); // big decoder only
6649 FPU : S4;
6650 MEM : S3(2); // any mem
6651 %}
6652
6653 pipe_class fpu_mem_mem(memory dst, memory src1)
6654 %{
6655 instruction_count(2);
6656 src1 : S3(read);
6657 dst : S4(read);
6658 D0 : S0(2); // big decoder only
6659 MEM : S3(2); // any mem
6660 %}
6661
6662 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6663 %{
6664 instruction_count(3);
6665 src1 : S3(read);
6666 src2 : S3(read);
6667 dst : S4(read);
6668 D0 : S0(3); // big decoder only
6669 FPU : S4;
6670 MEM : S3(3); // any mem
6671 %}
6672
6673 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6674 %{
6675 instruction_count(3);
6676 src1 : S4(read);
6677 mem : S4(read);
6678 DECODE : S0; // any decoder for FPU PUSH
6679 D0 : S0(2); // big decoder only
6680 FPU : S4;
6681 MEM : S3(2); // any mem
6682 %}
6683
6684 // Float load constant
6685 pipe_class fpu_reg_con(regD dst)
6686 %{
6687 instruction_count(2);
6688 dst : S5(write);
6689 D0 : S0; // big decoder only for the load
6690 DECODE : S1; // any decoder for FPU POP
6691 FPU : S4;
6692 MEM : S3; // any mem
6693 %}
6694
6695 // Float load constant
6696 pipe_class fpu_reg_reg_con(regD dst, regD src)
6697 %{
6698 instruction_count(3);
6699 dst : S5(write);
6700 src : S3(read);
6701 D0 : S0; // big decoder only for the load
6702 DECODE : S1(2); // any decoder for FPU POP
6703 FPU : S4;
6704 MEM : S3; // any mem
6705 %}
6706
6707 // UnConditional branch
6708 pipe_class pipe_jmp(label labl)
6709 %{
6710 single_instruction;
6711 BR : S3;
6712 %}
6713
6714 // Conditional branch
6715 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6716 %{
6717 single_instruction;
6718 cr : S1(read);
6719 BR : S3;
6720 %}
6721
6722 // Allocation idiom
6723 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6724 %{
6725 instruction_count(1); force_serialization;
6726 fixed_latency(6);
6727 heap_ptr : S3(read);
6728 DECODE : S0(3);
6729 D0 : S2;
6730 MEM : S3;
6731 ALU : S3(2);
6732 dst : S5(write);
6733 BR : S5;
6734 %}
6735
6736 // Generic big/slow expanded idiom
6737 pipe_class pipe_slow()
6738 %{
6739 instruction_count(10); multiple_bundles; force_serialization;
6740 fixed_latency(100);
6741 D0 : S0(2);
6742 MEM : S3(2);
6743 %}
6744
6745 // The real do-nothing guy
6746 pipe_class empty()
6747 %{
6748 instruction_count(0);
6749 %}
6750
6751 // Define the class for the Nop node
6752 define
6753 %{
6754 MachNop = empty;
6755 %}
6756
6757 %}
6758
6759 //----------INSTRUCTIONS-------------------------------------------------------
6760 //
6761 // match -- States which machine-independent subtree may be replaced
6762 // by this instruction.
6763 // ins_cost -- The estimated cost of this instruction is used by instruction
6764 // selection to identify a minimum cost tree of machine
6765 // instructions that matches a tree of machine-independent
6766 // instructions.
6767 // format -- A string providing the disassembly for this instruction.
6768 // The value of an instruction's operand may be inserted
6769 // by referring to it with a '$' prefix.
6770 // opcode -- Three instruction opcodes may be provided. These are referred
6771 // to within an encode class as $primary, $secondary, and $tertiary
6772 // rrspectively. The primary opcode is commonly used to
6773 // indicate the type of machine instruction, while secondary
6774 // and tertiary are often used for prefix options or addressing
6775 // modes.
6776 // ins_encode -- A list of encode classes with parameters. The encode class
6777 // name must have been defined in an 'enc_class' specification
6778 // in the encode section of the architecture description.
6779
6780 // ============================================================================
6781
6782 instruct ShouldNotReachHere() %{
6783 match(Halt);
6784 format %{ "stop\t# ShouldNotReachHere" %}
6785 ins_encode %{
6786 if (is_reachable()) {
6787 const char* str = __ code_string(_halt_reason);
6788 __ stop(str);
6789 }
6790 %}
6791 ins_pipe(pipe_slow);
6792 %}
6793
6794 // ============================================================================
6795
6796 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6797 // Load Float
6798 instruct MoveF2VL(vlRegF dst, regF src) %{
6799 match(Set dst src);
6800 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6801 ins_encode %{
6802 ShouldNotReachHere();
6803 %}
6804 ins_pipe( fpu_reg_reg );
6805 %}
6806
6807 // Load Float
6808 instruct MoveF2LEG(legRegF dst, regF src) %{
6809 match(Set dst src);
6810 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6811 ins_encode %{
6812 ShouldNotReachHere();
6813 %}
6814 ins_pipe( fpu_reg_reg );
6815 %}
6816
6817 // Load Float
6818 instruct MoveVL2F(regF dst, vlRegF src) %{
6819 match(Set dst src);
6820 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6821 ins_encode %{
6822 ShouldNotReachHere();
6823 %}
6824 ins_pipe( fpu_reg_reg );
6825 %}
6826
6827 // Load Float
6828 instruct MoveLEG2F(regF dst, legRegF src) %{
6829 match(Set dst src);
6830 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6831 ins_encode %{
6832 ShouldNotReachHere();
6833 %}
6834 ins_pipe( fpu_reg_reg );
6835 %}
6836
6837 // Load Double
6838 instruct MoveD2VL(vlRegD dst, regD src) %{
6839 match(Set dst src);
6840 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6841 ins_encode %{
6842 ShouldNotReachHere();
6843 %}
6844 ins_pipe( fpu_reg_reg );
6845 %}
6846
6847 // Load Double
6848 instruct MoveD2LEG(legRegD dst, regD src) %{
6849 match(Set dst src);
6850 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6851 ins_encode %{
6852 ShouldNotReachHere();
6853 %}
6854 ins_pipe( fpu_reg_reg );
6855 %}
6856
6857 // Load Double
6858 instruct MoveVL2D(regD dst, vlRegD src) %{
6859 match(Set dst src);
6860 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6861 ins_encode %{
6862 ShouldNotReachHere();
6863 %}
6864 ins_pipe( fpu_reg_reg );
6865 %}
6866
6867 // Load Double
6868 instruct MoveLEG2D(regD dst, legRegD src) %{
6869 match(Set dst src);
6870 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6871 ins_encode %{
6872 ShouldNotReachHere();
6873 %}
6874 ins_pipe( fpu_reg_reg );
6875 %}
6876
6877 //----------Load/Store/Move Instructions---------------------------------------
6878 //----------Load Instructions--------------------------------------------------
6879
6880 // Load Byte (8 bit signed)
6881 instruct loadB(rRegI dst, memory mem)
6882 %{
6883 match(Set dst (LoadB mem));
6884
6885 ins_cost(125);
6886 format %{ "movsbl $dst, $mem\t# byte" %}
6887
6888 ins_encode %{
6889 __ movsbl($dst$$Register, $mem$$Address);
6890 %}
6891
6892 ins_pipe(ialu_reg_mem);
6893 %}
6894
6895 // Load Byte (8 bit signed) into Long Register
6896 instruct loadB2L(rRegL dst, memory mem)
6897 %{
6898 match(Set dst (ConvI2L (LoadB mem)));
6899
6900 ins_cost(125);
6901 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6902
6903 ins_encode %{
6904 __ movsbq($dst$$Register, $mem$$Address);
6905 %}
6906
6907 ins_pipe(ialu_reg_mem);
6908 %}
6909
6910 // Load Unsigned Byte (8 bit UNsigned)
6911 instruct loadUB(rRegI dst, memory mem)
6912 %{
6913 match(Set dst (LoadUB mem));
6914
6915 ins_cost(125);
6916 format %{ "movzbl $dst, $mem\t# ubyte" %}
6917
6918 ins_encode %{
6919 __ movzbl($dst$$Register, $mem$$Address);
6920 %}
6921
6922 ins_pipe(ialu_reg_mem);
6923 %}
6924
6925 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6926 instruct loadUB2L(rRegL dst, memory mem)
6927 %{
6928 match(Set dst (ConvI2L (LoadUB mem)));
6929
6930 ins_cost(125);
6931 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6932
6933 ins_encode %{
6934 __ movzbq($dst$$Register, $mem$$Address);
6935 %}
6936
6937 ins_pipe(ialu_reg_mem);
6938 %}
6939
6940 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6941 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6942 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6943 effect(KILL cr);
6944
6945 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6946 "andl $dst, right_n_bits($mask, 8)" %}
6947 ins_encode %{
6948 Register Rdst = $dst$$Register;
6949 __ movzbq(Rdst, $mem$$Address);
6950 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6951 %}
6952 ins_pipe(ialu_reg_mem);
6953 %}
6954
6955 // Load Short (16 bit signed)
6956 instruct loadS(rRegI dst, memory mem)
6957 %{
6958 match(Set dst (LoadS mem));
6959
6960 ins_cost(125);
6961 format %{ "movswl $dst, $mem\t# short" %}
6962
6963 ins_encode %{
6964 __ movswl($dst$$Register, $mem$$Address);
6965 %}
6966
6967 ins_pipe(ialu_reg_mem);
6968 %}
6969
6970 // Load Short (16 bit signed) to Byte (8 bit signed)
6971 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6972 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6973
6974 ins_cost(125);
6975 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6976 ins_encode %{
6977 __ movsbl($dst$$Register, $mem$$Address);
6978 %}
6979 ins_pipe(ialu_reg_mem);
6980 %}
6981
6982 // Load Short (16 bit signed) into Long Register
6983 instruct loadS2L(rRegL dst, memory mem)
6984 %{
6985 match(Set dst (ConvI2L (LoadS mem)));
6986
6987 ins_cost(125);
6988 format %{ "movswq $dst, $mem\t# short -> long" %}
6989
6990 ins_encode %{
6991 __ movswq($dst$$Register, $mem$$Address);
6992 %}
6993
6994 ins_pipe(ialu_reg_mem);
6995 %}
6996
6997 // Load Unsigned Short/Char (16 bit UNsigned)
6998 instruct loadUS(rRegI dst, memory mem)
6999 %{
7000 match(Set dst (LoadUS mem));
7001
7002 ins_cost(125);
7003 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7004
7005 ins_encode %{
7006 __ movzwl($dst$$Register, $mem$$Address);
7007 %}
7008
7009 ins_pipe(ialu_reg_mem);
7010 %}
7011
7012 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7013 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7014 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7015
7016 ins_cost(125);
7017 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7018 ins_encode %{
7019 __ movsbl($dst$$Register, $mem$$Address);
7020 %}
7021 ins_pipe(ialu_reg_mem);
7022 %}
7023
7024 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7025 instruct loadUS2L(rRegL dst, memory mem)
7026 %{
7027 match(Set dst (ConvI2L (LoadUS mem)));
7028
7029 ins_cost(125);
7030 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7031
7032 ins_encode %{
7033 __ movzwq($dst$$Register, $mem$$Address);
7034 %}
7035
7036 ins_pipe(ialu_reg_mem);
7037 %}
7038
7039 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7040 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7041 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7042
7043 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7044 ins_encode %{
7045 __ movzbq($dst$$Register, $mem$$Address);
7046 %}
7047 ins_pipe(ialu_reg_mem);
7048 %}
7049
7050 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7051 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7052 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7053 effect(KILL cr);
7054
7055 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7056 "andl $dst, right_n_bits($mask, 16)" %}
7057 ins_encode %{
7058 Register Rdst = $dst$$Register;
7059 __ movzwq(Rdst, $mem$$Address);
7060 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7061 %}
7062 ins_pipe(ialu_reg_mem);
7063 %}
7064
7065 // Load Integer
7066 instruct loadI(rRegI dst, memory mem)
7067 %{
7068 match(Set dst (LoadI mem));
7069
7070 ins_cost(125);
7071 format %{ "movl $dst, $mem\t# int" %}
7072
7073 ins_encode %{
7074 __ movl($dst$$Register, $mem$$Address);
7075 %}
7076
7077 ins_pipe(ialu_reg_mem);
7078 %}
7079
7080 // Load Integer (32 bit signed) to Byte (8 bit signed)
7081 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7082 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7083
7084 ins_cost(125);
7085 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7086 ins_encode %{
7087 __ movsbl($dst$$Register, $mem$$Address);
7088 %}
7089 ins_pipe(ialu_reg_mem);
7090 %}
7091
7092 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7093 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7094 match(Set dst (AndI (LoadI mem) mask));
7095
7096 ins_cost(125);
7097 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7098 ins_encode %{
7099 __ movzbl($dst$$Register, $mem$$Address);
7100 %}
7101 ins_pipe(ialu_reg_mem);
7102 %}
7103
7104 // Load Integer (32 bit signed) to Short (16 bit signed)
7105 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7106 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7107
7108 ins_cost(125);
7109 format %{ "movswl $dst, $mem\t# int -> short" %}
7110 ins_encode %{
7111 __ movswl($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem);
7114 %}
7115
7116 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7117 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7118 match(Set dst (AndI (LoadI mem) mask));
7119
7120 ins_cost(125);
7121 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7122 ins_encode %{
7123 __ movzwl($dst$$Register, $mem$$Address);
7124 %}
7125 ins_pipe(ialu_reg_mem);
7126 %}
7127
7128 // Load Integer into Long Register
7129 instruct loadI2L(rRegL dst, memory mem)
7130 %{
7131 match(Set dst (ConvI2L (LoadI mem)));
7132
7133 ins_cost(125);
7134 format %{ "movslq $dst, $mem\t# int -> long" %}
7135
7136 ins_encode %{
7137 __ movslq($dst$$Register, $mem$$Address);
7138 %}
7139
7140 ins_pipe(ialu_reg_mem);
7141 %}
7142
7143 // Load Integer with mask 0xFF into Long Register
7144 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7145 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7146
7147 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7148 ins_encode %{
7149 __ movzbq($dst$$Register, $mem$$Address);
7150 %}
7151 ins_pipe(ialu_reg_mem);
7152 %}
7153
7154 // Load Integer with mask 0xFFFF into Long Register
7155 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7156 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7157
7158 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7159 ins_encode %{
7160 __ movzwq($dst$$Register, $mem$$Address);
7161 %}
7162 ins_pipe(ialu_reg_mem);
7163 %}
7164
7165 // Load Integer with a 31-bit mask into Long Register
7166 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7167 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7168 effect(KILL cr);
7169
7170 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7171 "andl $dst, $mask" %}
7172 ins_encode %{
7173 Register Rdst = $dst$$Register;
7174 __ movl(Rdst, $mem$$Address);
7175 __ andl(Rdst, $mask$$constant);
7176 %}
7177 ins_pipe(ialu_reg_mem);
7178 %}
7179
7180 // Load Unsigned Integer into Long Register
7181 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7182 %{
7183 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7184
7185 ins_cost(125);
7186 format %{ "movl $dst, $mem\t# uint -> long" %}
7187
7188 ins_encode %{
7189 __ movl($dst$$Register, $mem$$Address);
7190 %}
7191
7192 ins_pipe(ialu_reg_mem);
7193 %}
7194
7195 // Load Long
7196 instruct loadL(rRegL dst, memory mem)
7197 %{
7198 match(Set dst (LoadL mem));
7199
7200 ins_cost(125);
7201 format %{ "movq $dst, $mem\t# long" %}
7202
7203 ins_encode %{
7204 __ movq($dst$$Register, $mem$$Address);
7205 %}
7206
7207 ins_pipe(ialu_reg_mem); // XXX
7208 %}
7209
7210 // Load Range
7211 instruct loadRange(rRegI dst, memory mem)
7212 %{
7213 match(Set dst (LoadRange mem));
7214
7215 ins_cost(125); // XXX
7216 format %{ "movl $dst, $mem\t# range" %}
7217 ins_encode %{
7218 __ movl($dst$$Register, $mem$$Address);
7219 %}
7220 ins_pipe(ialu_reg_mem);
7221 %}
7222
7223 // Load Pointer
7224 instruct loadP(rRegP dst, memory mem)
7225 %{
7226 match(Set dst (LoadP mem));
7227 predicate(n->as_Load()->barrier_data() == 0);
7228
7229 ins_cost(125); // XXX
7230 format %{ "movq $dst, $mem\t# ptr" %}
7231 ins_encode %{
7232 __ movq($dst$$Register, $mem$$Address);
7233 %}
7234 ins_pipe(ialu_reg_mem); // XXX
7235 %}
7236
7237 // Load Compressed Pointer
7238 instruct loadN(rRegN dst, memory mem)
7239 %{
7240 predicate(n->as_Load()->barrier_data() == 0);
7241 match(Set dst (LoadN mem));
7242
7243 ins_cost(125); // XXX
7244 format %{ "movl $dst, $mem\t# compressed ptr" %}
7245 ins_encode %{
7246 __ movl($dst$$Register, $mem$$Address);
7247 %}
7248 ins_pipe(ialu_reg_mem); // XXX
7249 %}
7250
7251
7252 // Load Klass Pointer
7253 instruct loadKlass(rRegP dst, memory mem)
7254 %{
7255 match(Set dst (LoadKlass mem));
7256
7257 ins_cost(125); // XXX
7258 format %{ "movq $dst, $mem\t# class" %}
7259 ins_encode %{
7260 __ movq($dst$$Register, $mem$$Address);
7261 %}
7262 ins_pipe(ialu_reg_mem); // XXX
7263 %}
7264
7265 // Load narrow Klass Pointer
7266 instruct loadNKlass(rRegN dst, memory mem)
7267 %{
7268 predicate(!UseCompactObjectHeaders);
7269 match(Set dst (LoadNKlass mem));
7270
7271 ins_cost(125); // XXX
7272 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7273 ins_encode %{
7274 __ movl($dst$$Register, $mem$$Address);
7275 %}
7276 ins_pipe(ialu_reg_mem); // XXX
7277 %}
7278
7279 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7280 %{
7281 predicate(UseCompactObjectHeaders);
7282 match(Set dst (LoadNKlass mem));
7283 effect(KILL cr);
7284 ins_cost(125);
7285 format %{
7286 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7287 "shrl $dst, markWord::klass_shift"
7288 %}
7289 ins_encode %{
7290 // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
7291 // obj-start, so that we can load from the object's mark-word instead.
7292 Register d = $dst$$Register;
7293 Address s = ($mem$$Address).plus_disp(-Type::klass_offset());
7294 if (UseAPX) {
7295 __ eshrl(d, s, markWord::klass_shift, false);
7296 } else {
7297 __ movl(d, s);
7298 __ shrl(d, markWord::klass_shift);
7299 }
7300 %}
7301 ins_pipe(ialu_reg_mem);
7302 %}
7303
7304 // Load Float
7305 instruct loadF(regF dst, memory mem)
7306 %{
7307 match(Set dst (LoadF mem));
7308
7309 ins_cost(145); // XXX
7310 format %{ "movss $dst, $mem\t# float" %}
7311 ins_encode %{
7312 __ movflt($dst$$XMMRegister, $mem$$Address);
7313 %}
7314 ins_pipe(pipe_slow); // XXX
7315 %}
7316
7317 // Load Double
7318 instruct loadD_partial(regD dst, memory mem)
7319 %{
7320 predicate(!UseXmmLoadAndClearUpper);
7321 match(Set dst (LoadD mem));
7322
7323 ins_cost(145); // XXX
7324 format %{ "movlpd $dst, $mem\t# double" %}
7325 ins_encode %{
7326 __ movdbl($dst$$XMMRegister, $mem$$Address);
7327 %}
7328 ins_pipe(pipe_slow); // XXX
7329 %}
7330
7331 instruct loadD(regD dst, memory mem)
7332 %{
7333 predicate(UseXmmLoadAndClearUpper);
7334 match(Set dst (LoadD mem));
7335
7336 ins_cost(145); // XXX
7337 format %{ "movsd $dst, $mem\t# double" %}
7338 ins_encode %{
7339 __ movdbl($dst$$XMMRegister, $mem$$Address);
7340 %}
7341 ins_pipe(pipe_slow); // XXX
7342 %}
7343
7344 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7345 %{
7346 match(Set dst con);
7347
7348 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7349
7350 ins_encode %{
7351 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7352 %}
7353
7354 ins_pipe(ialu_reg_fat);
7355 %}
7356
7357 // min = java.lang.Math.min(float a, float b)
7358 // max = java.lang.Math.max(float a, float b)
7359 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7360 %{
7361 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7362 match(Set dst (MaxF a b));
7363 match(Set dst (MinF a b));
7364
7365 format %{ "minmaxF $dst, $a, $b" %}
7366 ins_encode %{
7367 int opcode = this->ideal_Opcode();
7368 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7369 %}
7370 ins_pipe( pipe_slow );
7371 %}
7372
7373 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
7374 %{
7375 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7376 match(Set dst (MaxF a b));
7377 match(Set dst (MinF a b));
7378 effect(USE a, USE b, TEMP rtmp, KILL cr);
7379
7380 format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7381 ins_encode %{
7382 int opcode = this->ideal_Opcode();
7383 bool min = (opcode == Op_MinF) ? true : false;
7384 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7385 min, fp_prec_flt /*pt*/);
7386 %}
7387 ins_pipe( pipe_slow );
7388 %}
7389
7390 // min = java.lang.Math.min(float a, float b)
7391 // max = java.lang.Math.max(float a, float b)
7392 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7393 %{
7394 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7395 match(Set dst (MaxF a b));
7396 match(Set dst (MinF a b));
7397 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7398
7399 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7400 ins_encode %{
7401 int opcode = this->ideal_Opcode();
7402 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7403 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7404 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7405 %}
7406 ins_pipe( pipe_slow );
7407 %}
7408
7409 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
7410 %{
7411 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7412 match(Set dst (MaxF a b));
7413 match(Set dst (MinF a b));
7414 effect(USE a, USE b, TEMP rtmp, KILL cr);
7415
7416 format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
7417 ins_encode %{
7418 int opcode = this->ideal_Opcode();
7419 bool min = (opcode == Op_MinF) ? true : false;
7420 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7421 min, fp_prec_flt /*pt*/);
7422 %}
7423 ins_pipe( pipe_slow );
7424 %}
7425
7426 // min = java.lang.Math.min(double a, double b)
7427 // max = java.lang.Math.max(double a, double b)
7428 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7429 %{
7430 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7431 match(Set dst (MaxD a b));
7432 match(Set dst (MinD a b));
7433
7434 format %{ "minmaxD $dst, $a, $b" %}
7435 ins_encode %{
7436 int opcode = this->ideal_Opcode();
7437 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7438 %}
7439 ins_pipe( pipe_slow );
7440 %}
7441
7442 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
7443 %{
7444 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7445 match(Set dst (MaxD a b));
7446 match(Set dst (MinD a b));
7447 effect(USE a, USE b, TEMP rtmp, KILL cr);
7448
7449 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7450 ins_encode %{
7451 int opcode = this->ideal_Opcode();
7452 bool min = (opcode == Op_MinD) ? true : false;
7453 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7454 min, fp_prec_dbl /*pt*/);
7455 %}
7456 ins_pipe( pipe_slow );
7457 %}
7458
7459 // min = java.lang.Math.min(double a, double b)
7460 // max = java.lang.Math.max(double a, double b)
7461 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7462 %{
7463 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7464 match(Set dst (MaxD a b));
7465 match(Set dst (MinD a b));
7466 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7467
7468 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7469 ins_encode %{
7470 int opcode = this->ideal_Opcode();
7471 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7472 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7473 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7474 %}
7475 ins_pipe( pipe_slow );
7476 %}
7477
7478 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
7479 %{
7480 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7481 match(Set dst (MaxD a b));
7482 match(Set dst (MinD a b));
7483 effect(USE a, USE b, TEMP rtmp, KILL cr);
7484
7485 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7486 ins_encode %{
7487 int opcode = this->ideal_Opcode();
7488 bool min = (opcode == Op_MinD) ? true : false;
7489 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7490 min, fp_prec_dbl /*pt*/);
7491 %}
7492 ins_pipe( pipe_slow );
7493 %}
7494
7495 // Load Effective Address
7496 instruct leaP8(rRegP dst, indOffset8 mem)
7497 %{
7498 match(Set dst mem);
7499
7500 ins_cost(110); // XXX
7501 format %{ "leaq $dst, $mem\t# ptr 8" %}
7502 ins_encode %{
7503 __ leaq($dst$$Register, $mem$$Address);
7504 %}
7505 ins_pipe(ialu_reg_reg_fat);
7506 %}
7507
7508 instruct leaP32(rRegP dst, indOffset32 mem)
7509 %{
7510 match(Set dst mem);
7511
7512 ins_cost(110);
7513 format %{ "leaq $dst, $mem\t# ptr 32" %}
7514 ins_encode %{
7515 __ leaq($dst$$Register, $mem$$Address);
7516 %}
7517 ins_pipe(ialu_reg_reg_fat);
7518 %}
7519
7520 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7521 %{
7522 match(Set dst mem);
7523
7524 ins_cost(110);
7525 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7526 ins_encode %{
7527 __ leaq($dst$$Register, $mem$$Address);
7528 %}
7529 ins_pipe(ialu_reg_reg_fat);
7530 %}
7531
7532 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7533 %{
7534 match(Set dst mem);
7535
7536 ins_cost(110);
7537 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7538 ins_encode %{
7539 __ leaq($dst$$Register, $mem$$Address);
7540 %}
7541 ins_pipe(ialu_reg_reg_fat);
7542 %}
7543
7544 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7545 %{
7546 match(Set dst mem);
7547
7548 ins_cost(110);
7549 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7550 ins_encode %{
7551 __ leaq($dst$$Register, $mem$$Address);
7552 %}
7553 ins_pipe(ialu_reg_reg_fat);
7554 %}
7555
7556 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7557 %{
7558 match(Set dst mem);
7559
7560 ins_cost(110);
7561 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7562 ins_encode %{
7563 __ leaq($dst$$Register, $mem$$Address);
7564 %}
7565 ins_pipe(ialu_reg_reg_fat);
7566 %}
7567
7568 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7569 %{
7570 match(Set dst mem);
7571
7572 ins_cost(110);
7573 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7574 ins_encode %{
7575 __ leaq($dst$$Register, $mem$$Address);
7576 %}
7577 ins_pipe(ialu_reg_reg_fat);
7578 %}
7579
7580 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7581 %{
7582 match(Set dst mem);
7583
7584 ins_cost(110);
7585 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7586 ins_encode %{
7587 __ leaq($dst$$Register, $mem$$Address);
7588 %}
7589 ins_pipe(ialu_reg_reg_fat);
7590 %}
7591
7592 // Load Effective Address which uses Narrow (32-bits) oop
7593 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7594 %{
7595 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7596 match(Set dst mem);
7597
7598 ins_cost(110);
7599 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7600 ins_encode %{
7601 __ leaq($dst$$Register, $mem$$Address);
7602 %}
7603 ins_pipe(ialu_reg_reg_fat);
7604 %}
7605
7606 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7607 %{
7608 predicate(CompressedOops::shift() == 0);
7609 match(Set dst mem);
7610
7611 ins_cost(110); // XXX
7612 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7613 ins_encode %{
7614 __ leaq($dst$$Register, $mem$$Address);
7615 %}
7616 ins_pipe(ialu_reg_reg_fat);
7617 %}
7618
7619 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7620 %{
7621 predicate(CompressedOops::shift() == 0);
7622 match(Set dst mem);
7623
7624 ins_cost(110);
7625 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7626 ins_encode %{
7627 __ leaq($dst$$Register, $mem$$Address);
7628 %}
7629 ins_pipe(ialu_reg_reg_fat);
7630 %}
7631
7632 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7633 %{
7634 predicate(CompressedOops::shift() == 0);
7635 match(Set dst mem);
7636
7637 ins_cost(110);
7638 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7639 ins_encode %{
7640 __ leaq($dst$$Register, $mem$$Address);
7641 %}
7642 ins_pipe(ialu_reg_reg_fat);
7643 %}
7644
7645 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7646 %{
7647 predicate(CompressedOops::shift() == 0);
7648 match(Set dst mem);
7649
7650 ins_cost(110);
7651 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7652 ins_encode %{
7653 __ leaq($dst$$Register, $mem$$Address);
7654 %}
7655 ins_pipe(ialu_reg_reg_fat);
7656 %}
7657
7658 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7659 %{
7660 predicate(CompressedOops::shift() == 0);
7661 match(Set dst mem);
7662
7663 ins_cost(110);
7664 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7665 ins_encode %{
7666 __ leaq($dst$$Register, $mem$$Address);
7667 %}
7668 ins_pipe(ialu_reg_reg_fat);
7669 %}
7670
7671 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7672 %{
7673 predicate(CompressedOops::shift() == 0);
7674 match(Set dst mem);
7675
7676 ins_cost(110);
7677 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7678 ins_encode %{
7679 __ leaq($dst$$Register, $mem$$Address);
7680 %}
7681 ins_pipe(ialu_reg_reg_fat);
7682 %}
7683
7684 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7685 %{
7686 predicate(CompressedOops::shift() == 0);
7687 match(Set dst mem);
7688
7689 ins_cost(110);
7690 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7691 ins_encode %{
7692 __ leaq($dst$$Register, $mem$$Address);
7693 %}
7694 ins_pipe(ialu_reg_reg_fat);
7695 %}
7696
7697 instruct loadConI(rRegI dst, immI src)
7698 %{
7699 match(Set dst src);
7700
7701 format %{ "movl $dst, $src\t# int" %}
7702 ins_encode %{
7703 __ movl($dst$$Register, $src$$constant);
7704 %}
7705 ins_pipe(ialu_reg_fat); // XXX
7706 %}
7707
7708 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7709 %{
7710 match(Set dst src);
7711 effect(KILL cr);
7712
7713 ins_cost(50);
7714 format %{ "xorl $dst, $dst\t# int" %}
7715 ins_encode %{
7716 __ xorl($dst$$Register, $dst$$Register);
7717 %}
7718 ins_pipe(ialu_reg);
7719 %}
7720
7721 instruct loadConL(rRegL dst, immL src)
7722 %{
7723 match(Set dst src);
7724
7725 ins_cost(150);
7726 format %{ "movq $dst, $src\t# long" %}
7727 ins_encode %{
7728 __ mov64($dst$$Register, $src$$constant);
7729 %}
7730 ins_pipe(ialu_reg);
7731 %}
7732
7733 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7734 %{
7735 match(Set dst src);
7736 effect(KILL cr);
7737
7738 ins_cost(50);
7739 format %{ "xorl $dst, $dst\t# long" %}
7740 ins_encode %{
7741 __ xorl($dst$$Register, $dst$$Register);
7742 %}
7743 ins_pipe(ialu_reg); // XXX
7744 %}
7745
7746 instruct loadConUL32(rRegL dst, immUL32 src)
7747 %{
7748 match(Set dst src);
7749
7750 ins_cost(60);
7751 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7752 ins_encode %{
7753 __ movl($dst$$Register, $src$$constant);
7754 %}
7755 ins_pipe(ialu_reg);
7756 %}
7757
7758 instruct loadConL32(rRegL dst, immL32 src)
7759 %{
7760 match(Set dst src);
7761
7762 ins_cost(70);
7763 format %{ "movq $dst, $src\t# long (32-bit)" %}
7764 ins_encode %{
7765 __ movq($dst$$Register, $src$$constant);
7766 %}
7767 ins_pipe(ialu_reg);
7768 %}
7769
7770 instruct loadConP(rRegP dst, immP con) %{
7771 match(Set dst con);
7772
7773 format %{ "movq $dst, $con\t# ptr" %}
7774 ins_encode %{
7775 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7776 %}
7777 ins_pipe(ialu_reg_fat); // XXX
7778 %}
7779
7780 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7781 %{
7782 match(Set dst src);
7783 effect(KILL cr);
7784
7785 ins_cost(50);
7786 format %{ "xorl $dst, $dst\t# ptr" %}
7787 ins_encode %{
7788 __ xorl($dst$$Register, $dst$$Register);
7789 %}
7790 ins_pipe(ialu_reg);
7791 %}
7792
7793 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7794 %{
7795 match(Set dst src);
7796 effect(KILL cr);
7797
7798 ins_cost(60);
7799 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7800 ins_encode %{
7801 __ movl($dst$$Register, $src$$constant);
7802 %}
7803 ins_pipe(ialu_reg);
7804 %}
7805
7806 instruct loadConF(regF dst, immF con) %{
7807 match(Set dst con);
7808 ins_cost(125);
7809 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7810 ins_encode %{
7811 __ movflt($dst$$XMMRegister, $constantaddress($con));
7812 %}
7813 ins_pipe(pipe_slow);
7814 %}
7815
7816 instruct loadConH(regF dst, immH con) %{
7817 match(Set dst con);
7818 ins_cost(125);
7819 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7820 ins_encode %{
7821 __ movflt($dst$$XMMRegister, $constantaddress($con));
7822 %}
7823 ins_pipe(pipe_slow);
7824 %}
7825
7826 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7827 match(Set dst src);
7828 effect(KILL cr);
7829 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7830 ins_encode %{
7831 __ xorq($dst$$Register, $dst$$Register);
7832 %}
7833 ins_pipe(ialu_reg);
7834 %}
7835
7836 instruct loadConN(rRegN dst, immN src) %{
7837 match(Set dst src);
7838
7839 ins_cost(125);
7840 format %{ "movl $dst, $src\t# compressed ptr" %}
7841 ins_encode %{
7842 address con = (address)$src$$constant;
7843 if (con == nullptr) {
7844 ShouldNotReachHere();
7845 } else {
7846 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7847 }
7848 %}
7849 ins_pipe(ialu_reg_fat); // XXX
7850 %}
7851
7852 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7853 match(Set dst src);
7854
7855 ins_cost(125);
7856 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7857 ins_encode %{
7858 address con = (address)$src$$constant;
7859 if (con == nullptr) {
7860 ShouldNotReachHere();
7861 } else {
7862 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7863 }
7864 %}
7865 ins_pipe(ialu_reg_fat); // XXX
7866 %}
7867
7868 instruct loadConF0(regF dst, immF0 src)
7869 %{
7870 match(Set dst src);
7871 ins_cost(100);
7872
7873 format %{ "xorps $dst, $dst\t# float 0.0" %}
7874 ins_encode %{
7875 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7876 %}
7877 ins_pipe(pipe_slow);
7878 %}
7879
7880 // Use the same format since predicate() can not be used here.
7881 instruct loadConD(regD dst, immD con) %{
7882 match(Set dst con);
7883 ins_cost(125);
7884 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7885 ins_encode %{
7886 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7887 %}
7888 ins_pipe(pipe_slow);
7889 %}
7890
7891 instruct loadConD0(regD dst, immD0 src)
7892 %{
7893 match(Set dst src);
7894 ins_cost(100);
7895
7896 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7897 ins_encode %{
7898 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7899 %}
7900 ins_pipe(pipe_slow);
7901 %}
7902
7903 instruct loadSSI(rRegI dst, stackSlotI src)
7904 %{
7905 match(Set dst src);
7906
7907 ins_cost(125);
7908 format %{ "movl $dst, $src\t# int stk" %}
7909 ins_encode %{
7910 __ movl($dst$$Register, $src$$Address);
7911 %}
7912 ins_pipe(ialu_reg_mem);
7913 %}
7914
7915 instruct loadSSL(rRegL dst, stackSlotL src)
7916 %{
7917 match(Set dst src);
7918
7919 ins_cost(125);
7920 format %{ "movq $dst, $src\t# long stk" %}
7921 ins_encode %{
7922 __ movq($dst$$Register, $src$$Address);
7923 %}
7924 ins_pipe(ialu_reg_mem);
7925 %}
7926
7927 instruct loadSSP(rRegP dst, stackSlotP src)
7928 %{
7929 match(Set dst src);
7930
7931 ins_cost(125);
7932 format %{ "movq $dst, $src\t# ptr stk" %}
7933 ins_encode %{
7934 __ movq($dst$$Register, $src$$Address);
7935 %}
7936 ins_pipe(ialu_reg_mem);
7937 %}
7938
7939 instruct loadSSF(regF dst, stackSlotF src)
7940 %{
7941 match(Set dst src);
7942
7943 ins_cost(125);
7944 format %{ "movss $dst, $src\t# float stk" %}
7945 ins_encode %{
7946 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7947 %}
7948 ins_pipe(pipe_slow); // XXX
7949 %}
7950
7951 // Use the same format since predicate() can not be used here.
7952 instruct loadSSD(regD dst, stackSlotD src)
7953 %{
7954 match(Set dst src);
7955
7956 ins_cost(125);
7957 format %{ "movsd $dst, $src\t# double stk" %}
7958 ins_encode %{
7959 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7960 %}
7961 ins_pipe(pipe_slow); // XXX
7962 %}
7963
7964 // Prefetch instructions for allocation.
7965 // Must be safe to execute with invalid address (cannot fault).
7966
7967 instruct prefetchAlloc( memory mem ) %{
7968 predicate(AllocatePrefetchInstr==3);
7969 match(PrefetchAllocation mem);
7970 ins_cost(125);
7971
7972 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7973 ins_encode %{
7974 __ prefetchw($mem$$Address);
7975 %}
7976 ins_pipe(ialu_mem);
7977 %}
7978
7979 instruct prefetchAllocNTA( memory mem ) %{
7980 predicate(AllocatePrefetchInstr==0);
7981 match(PrefetchAllocation mem);
7982 ins_cost(125);
7983
7984 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7985 ins_encode %{
7986 __ prefetchnta($mem$$Address);
7987 %}
7988 ins_pipe(ialu_mem);
7989 %}
7990
7991 instruct prefetchAllocT0( memory mem ) %{
7992 predicate(AllocatePrefetchInstr==1);
7993 match(PrefetchAllocation mem);
7994 ins_cost(125);
7995
7996 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7997 ins_encode %{
7998 __ prefetcht0($mem$$Address);
7999 %}
8000 ins_pipe(ialu_mem);
8001 %}
8002
8003 instruct prefetchAllocT2( memory mem ) %{
8004 predicate(AllocatePrefetchInstr==2);
8005 match(PrefetchAllocation mem);
8006 ins_cost(125);
8007
8008 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8009 ins_encode %{
8010 __ prefetcht2($mem$$Address);
8011 %}
8012 ins_pipe(ialu_mem);
8013 %}
8014
8015 //----------Store Instructions-------------------------------------------------
8016
8017 // Store Byte
8018 instruct storeB(memory mem, rRegI src)
8019 %{
8020 match(Set mem (StoreB mem src));
8021
8022 ins_cost(125); // XXX
8023 format %{ "movb $mem, $src\t# byte" %}
8024 ins_encode %{
8025 __ movb($mem$$Address, $src$$Register);
8026 %}
8027 ins_pipe(ialu_mem_reg);
8028 %}
8029
8030 // Store Char/Short
8031 instruct storeC(memory mem, rRegI src)
8032 %{
8033 match(Set mem (StoreC mem src));
8034
8035 ins_cost(125); // XXX
8036 format %{ "movw $mem, $src\t# char/short" %}
8037 ins_encode %{
8038 __ movw($mem$$Address, $src$$Register);
8039 %}
8040 ins_pipe(ialu_mem_reg);
8041 %}
8042
8043 // Store Integer
8044 instruct storeI(memory mem, rRegI src)
8045 %{
8046 match(Set mem (StoreI mem src));
8047
8048 ins_cost(125); // XXX
8049 format %{ "movl $mem, $src\t# int" %}
8050 ins_encode %{
8051 __ movl($mem$$Address, $src$$Register);
8052 %}
8053 ins_pipe(ialu_mem_reg);
8054 %}
8055
8056 // Store Long
8057 instruct storeL(memory mem, rRegL src)
8058 %{
8059 match(Set mem (StoreL mem src));
8060
8061 ins_cost(125); // XXX
8062 format %{ "movq $mem, $src\t# long" %}
8063 ins_encode %{
8064 __ movq($mem$$Address, $src$$Register);
8065 %}
8066 ins_pipe(ialu_mem_reg); // XXX
8067 %}
8068
8069 // Store Pointer
8070 instruct storeP(memory mem, any_RegP src)
8071 %{
8072 predicate(n->as_Store()->barrier_data() == 0);
8073 match(Set mem (StoreP mem src));
8074
8075 ins_cost(125); // XXX
8076 format %{ "movq $mem, $src\t# ptr" %}
8077 ins_encode %{
8078 __ movq($mem$$Address, $src$$Register);
8079 %}
8080 ins_pipe(ialu_mem_reg);
8081 %}
8082
8083 instruct storeImmP0(memory mem, immP0 zero)
8084 %{
8085 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8086 match(Set mem (StoreP mem zero));
8087
8088 ins_cost(125); // XXX
8089 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8090 ins_encode %{
8091 __ movq($mem$$Address, r12);
8092 %}
8093 ins_pipe(ialu_mem_reg);
8094 %}
8095
8096 // Store Null Pointer, mark word, or other simple pointer constant.
8097 instruct storeImmP(memory mem, immP31 src)
8098 %{
8099 predicate(n->as_Store()->barrier_data() == 0);
8100 match(Set mem (StoreP mem src));
8101
8102 ins_cost(150); // XXX
8103 format %{ "movq $mem, $src\t# ptr" %}
8104 ins_encode %{
8105 __ movq($mem$$Address, $src$$constant);
8106 %}
8107 ins_pipe(ialu_mem_imm);
8108 %}
8109
8110 // Store Compressed Pointer
8111 instruct storeN(memory mem, rRegN src)
8112 %{
8113 predicate(n->as_Store()->barrier_data() == 0);
8114 match(Set mem (StoreN mem src));
8115
8116 ins_cost(125); // XXX
8117 format %{ "movl $mem, $src\t# compressed ptr" %}
8118 ins_encode %{
8119 __ movl($mem$$Address, $src$$Register);
8120 %}
8121 ins_pipe(ialu_mem_reg);
8122 %}
8123
8124 instruct storeNKlass(memory mem, rRegN src)
8125 %{
8126 match(Set mem (StoreNKlass mem src));
8127
8128 ins_cost(125); // XXX
8129 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8130 ins_encode %{
8131 __ movl($mem$$Address, $src$$Register);
8132 %}
8133 ins_pipe(ialu_mem_reg);
8134 %}
8135
8136 instruct storeImmN0(memory mem, immN0 zero)
8137 %{
8138 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8139 match(Set mem (StoreN mem zero));
8140
8141 ins_cost(125); // XXX
8142 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8143 ins_encode %{
8144 __ movl($mem$$Address, r12);
8145 %}
8146 ins_pipe(ialu_mem_reg);
8147 %}
8148
8149 instruct storeImmN(memory mem, immN src)
8150 %{
8151 predicate(n->as_Store()->barrier_data() == 0);
8152 match(Set mem (StoreN mem src));
8153
8154 ins_cost(150); // XXX
8155 format %{ "movl $mem, $src\t# compressed ptr" %}
8156 ins_encode %{
8157 address con = (address)$src$$constant;
8158 if (con == nullptr) {
8159 __ movl($mem$$Address, 0);
8160 } else {
8161 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8162 }
8163 %}
8164 ins_pipe(ialu_mem_imm);
8165 %}
8166
8167 instruct storeImmNKlass(memory mem, immNKlass src)
8168 %{
8169 match(Set mem (StoreNKlass mem src));
8170
8171 ins_cost(150); // XXX
8172 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8173 ins_encode %{
8174 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8175 %}
8176 ins_pipe(ialu_mem_imm);
8177 %}
8178
8179 // Store Integer Immediate
8180 instruct storeImmI0(memory mem, immI_0 zero)
8181 %{
8182 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8183 match(Set mem (StoreI mem zero));
8184
8185 ins_cost(125); // XXX
8186 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8187 ins_encode %{
8188 __ movl($mem$$Address, r12);
8189 %}
8190 ins_pipe(ialu_mem_reg);
8191 %}
8192
8193 instruct storeImmI(memory mem, immI src)
8194 %{
8195 match(Set mem (StoreI mem src));
8196
8197 ins_cost(150);
8198 format %{ "movl $mem, $src\t# int" %}
8199 ins_encode %{
8200 __ movl($mem$$Address, $src$$constant);
8201 %}
8202 ins_pipe(ialu_mem_imm);
8203 %}
8204
8205 // Store Long Immediate
8206 instruct storeImmL0(memory mem, immL0 zero)
8207 %{
8208 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8209 match(Set mem (StoreL mem zero));
8210
8211 ins_cost(125); // XXX
8212 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8213 ins_encode %{
8214 __ movq($mem$$Address, r12);
8215 %}
8216 ins_pipe(ialu_mem_reg);
8217 %}
8218
8219 instruct storeImmL(memory mem, immL32 src)
8220 %{
8221 match(Set mem (StoreL mem src));
8222
8223 ins_cost(150);
8224 format %{ "movq $mem, $src\t# long" %}
8225 ins_encode %{
8226 __ movq($mem$$Address, $src$$constant);
8227 %}
8228 ins_pipe(ialu_mem_imm);
8229 %}
8230
8231 // Store Short/Char Immediate
8232 instruct storeImmC0(memory mem, immI_0 zero)
8233 %{
8234 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8235 match(Set mem (StoreC mem zero));
8236
8237 ins_cost(125); // XXX
8238 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8239 ins_encode %{
8240 __ movw($mem$$Address, r12);
8241 %}
8242 ins_pipe(ialu_mem_reg);
8243 %}
8244
8245 instruct storeImmI16(memory mem, immI16 src)
8246 %{
8247 predicate(UseStoreImmI16);
8248 match(Set mem (StoreC mem src));
8249
8250 ins_cost(150);
8251 format %{ "movw $mem, $src\t# short/char" %}
8252 ins_encode %{
8253 __ movw($mem$$Address, $src$$constant);
8254 %}
8255 ins_pipe(ialu_mem_imm);
8256 %}
8257
8258 // Store Byte Immediate
8259 instruct storeImmB0(memory mem, immI_0 zero)
8260 %{
8261 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8262 match(Set mem (StoreB mem zero));
8263
8264 ins_cost(125); // XXX
8265 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8266 ins_encode %{
8267 __ movb($mem$$Address, r12);
8268 %}
8269 ins_pipe(ialu_mem_reg);
8270 %}
8271
8272 instruct storeImmB(memory mem, immI8 src)
8273 %{
8274 match(Set mem (StoreB mem src));
8275
8276 ins_cost(150); // XXX
8277 format %{ "movb $mem, $src\t# byte" %}
8278 ins_encode %{
8279 __ movb($mem$$Address, $src$$constant);
8280 %}
8281 ins_pipe(ialu_mem_imm);
8282 %}
8283
8284 // Store Float
8285 instruct storeF(memory mem, regF src)
8286 %{
8287 match(Set mem (StoreF mem src));
8288
8289 ins_cost(95); // XXX
8290 format %{ "movss $mem, $src\t# float" %}
8291 ins_encode %{
8292 __ movflt($mem$$Address, $src$$XMMRegister);
8293 %}
8294 ins_pipe(pipe_slow); // XXX
8295 %}
8296
8297 // Store immediate Float value (it is faster than store from XMM register)
8298 instruct storeF0(memory mem, immF0 zero)
8299 %{
8300 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8301 match(Set mem (StoreF mem zero));
8302
8303 ins_cost(25); // XXX
8304 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8305 ins_encode %{
8306 __ movl($mem$$Address, r12);
8307 %}
8308 ins_pipe(ialu_mem_reg);
8309 %}
8310
8311 instruct storeF_imm(memory mem, immF src)
8312 %{
8313 match(Set mem (StoreF mem src));
8314
8315 ins_cost(50);
8316 format %{ "movl $mem, $src\t# float" %}
8317 ins_encode %{
8318 __ movl($mem$$Address, jint_cast($src$$constant));
8319 %}
8320 ins_pipe(ialu_mem_imm);
8321 %}
8322
8323 // Store Double
8324 instruct storeD(memory mem, regD src)
8325 %{
8326 match(Set mem (StoreD mem src));
8327
8328 ins_cost(95); // XXX
8329 format %{ "movsd $mem, $src\t# double" %}
8330 ins_encode %{
8331 __ movdbl($mem$$Address, $src$$XMMRegister);
8332 %}
8333 ins_pipe(pipe_slow); // XXX
8334 %}
8335
8336 // Store immediate double 0.0 (it is faster than store from XMM register)
8337 instruct storeD0_imm(memory mem, immD0 src)
8338 %{
8339 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8340 match(Set mem (StoreD mem src));
8341
8342 ins_cost(50);
8343 format %{ "movq $mem, $src\t# double 0." %}
8344 ins_encode %{
8345 __ movq($mem$$Address, $src$$constant);
8346 %}
8347 ins_pipe(ialu_mem_imm);
8348 %}
8349
8350 instruct storeD0(memory mem, immD0 zero)
8351 %{
8352 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8353 match(Set mem (StoreD mem zero));
8354
8355 ins_cost(25); // XXX
8356 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8357 ins_encode %{
8358 __ movq($mem$$Address, r12);
8359 %}
8360 ins_pipe(ialu_mem_reg);
8361 %}
8362
8363 instruct storeSSI(stackSlotI dst, rRegI src)
8364 %{
8365 match(Set dst src);
8366
8367 ins_cost(100);
8368 format %{ "movl $dst, $src\t# int stk" %}
8369 ins_encode %{
8370 __ movl($dst$$Address, $src$$Register);
8371 %}
8372 ins_pipe( ialu_mem_reg );
8373 %}
8374
8375 instruct storeSSL(stackSlotL dst, rRegL src)
8376 %{
8377 match(Set dst src);
8378
8379 ins_cost(100);
8380 format %{ "movq $dst, $src\t# long stk" %}
8381 ins_encode %{
8382 __ movq($dst$$Address, $src$$Register);
8383 %}
8384 ins_pipe(ialu_mem_reg);
8385 %}
8386
8387 instruct storeSSP(stackSlotP dst, rRegP src)
8388 %{
8389 match(Set dst src);
8390
8391 ins_cost(100);
8392 format %{ "movq $dst, $src\t# ptr stk" %}
8393 ins_encode %{
8394 __ movq($dst$$Address, $src$$Register);
8395 %}
8396 ins_pipe(ialu_mem_reg);
8397 %}
8398
8399 instruct storeSSF(stackSlotF dst, regF src)
8400 %{
8401 match(Set dst src);
8402
8403 ins_cost(95); // XXX
8404 format %{ "movss $dst, $src\t# float stk" %}
8405 ins_encode %{
8406 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8407 %}
8408 ins_pipe(pipe_slow); // XXX
8409 %}
8410
8411 instruct storeSSD(stackSlotD dst, regD src)
8412 %{
8413 match(Set dst src);
8414
8415 ins_cost(95); // XXX
8416 format %{ "movsd $dst, $src\t# double stk" %}
8417 ins_encode %{
8418 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8419 %}
8420 ins_pipe(pipe_slow); // XXX
8421 %}
8422
8423 instruct cacheWB(indirect addr)
8424 %{
8425 predicate(VM_Version::supports_data_cache_line_flush());
8426 match(CacheWB addr);
8427
8428 ins_cost(100);
8429 format %{"cache wb $addr" %}
8430 ins_encode %{
8431 assert($addr->index_position() < 0, "should be");
8432 assert($addr$$disp == 0, "should be");
8433 __ cache_wb(Address($addr$$base$$Register, 0));
8434 %}
8435 ins_pipe(pipe_slow); // XXX
8436 %}
8437
8438 instruct cacheWBPreSync()
8439 %{
8440 predicate(VM_Version::supports_data_cache_line_flush());
8441 match(CacheWBPreSync);
8442
8443 ins_cost(100);
8444 format %{"cache wb presync" %}
8445 ins_encode %{
8446 __ cache_wbsync(true);
8447 %}
8448 ins_pipe(pipe_slow); // XXX
8449 %}
8450
8451 instruct cacheWBPostSync()
8452 %{
8453 predicate(VM_Version::supports_data_cache_line_flush());
8454 match(CacheWBPostSync);
8455
8456 ins_cost(100);
8457 format %{"cache wb postsync" %}
8458 ins_encode %{
8459 __ cache_wbsync(false);
8460 %}
8461 ins_pipe(pipe_slow); // XXX
8462 %}
8463
8464 //----------BSWAP Instructions-------------------------------------------------
8465 instruct bytes_reverse_int(rRegI dst) %{
8466 match(Set dst (ReverseBytesI dst));
8467
8468 format %{ "bswapl $dst" %}
8469 ins_encode %{
8470 __ bswapl($dst$$Register);
8471 %}
8472 ins_pipe( ialu_reg );
8473 %}
8474
8475 instruct bytes_reverse_long(rRegL dst) %{
8476 match(Set dst (ReverseBytesL dst));
8477
8478 format %{ "bswapq $dst" %}
8479 ins_encode %{
8480 __ bswapq($dst$$Register);
8481 %}
8482 ins_pipe( ialu_reg);
8483 %}
8484
8485 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8486 match(Set dst (ReverseBytesUS dst));
8487 effect(KILL cr);
8488
8489 format %{ "bswapl $dst\n\t"
8490 "shrl $dst,16\n\t" %}
8491 ins_encode %{
8492 __ bswapl($dst$$Register);
8493 __ shrl($dst$$Register, 16);
8494 %}
8495 ins_pipe( ialu_reg );
8496 %}
8497
8498 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8499 match(Set dst (ReverseBytesS dst));
8500 effect(KILL cr);
8501
8502 format %{ "bswapl $dst\n\t"
8503 "sar $dst,16\n\t" %}
8504 ins_encode %{
8505 __ bswapl($dst$$Register);
8506 __ sarl($dst$$Register, 16);
8507 %}
8508 ins_pipe( ialu_reg );
8509 %}
8510
8511 //---------- Zeros Count Instructions ------------------------------------------
8512
8513 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8514 predicate(UseCountLeadingZerosInstruction);
8515 match(Set dst (CountLeadingZerosI src));
8516 effect(KILL cr);
8517
8518 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8519 ins_encode %{
8520 __ lzcntl($dst$$Register, $src$$Register);
8521 %}
8522 ins_pipe(ialu_reg);
8523 %}
8524
8525 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8526 predicate(UseCountLeadingZerosInstruction);
8527 match(Set dst (CountLeadingZerosI (LoadI src)));
8528 effect(KILL cr);
8529 ins_cost(175);
8530 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8531 ins_encode %{
8532 __ lzcntl($dst$$Register, $src$$Address);
8533 %}
8534 ins_pipe(ialu_reg_mem);
8535 %}
8536
8537 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8538 predicate(!UseCountLeadingZerosInstruction);
8539 match(Set dst (CountLeadingZerosI src));
8540 effect(KILL cr);
8541
8542 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8543 "jnz skip\n\t"
8544 "movl $dst, -1\n"
8545 "skip:\n\t"
8546 "negl $dst\n\t"
8547 "addl $dst, 31" %}
8548 ins_encode %{
8549 Register Rdst = $dst$$Register;
8550 Register Rsrc = $src$$Register;
8551 Label skip;
8552 __ bsrl(Rdst, Rsrc);
8553 __ jccb(Assembler::notZero, skip);
8554 __ movl(Rdst, -1);
8555 __ bind(skip);
8556 __ negl(Rdst);
8557 __ addl(Rdst, BitsPerInt - 1);
8558 %}
8559 ins_pipe(ialu_reg);
8560 %}
8561
8562 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8563 predicate(UseCountLeadingZerosInstruction);
8564 match(Set dst (CountLeadingZerosL src));
8565 effect(KILL cr);
8566
8567 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8568 ins_encode %{
8569 __ lzcntq($dst$$Register, $src$$Register);
8570 %}
8571 ins_pipe(ialu_reg);
8572 %}
8573
8574 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8575 predicate(UseCountLeadingZerosInstruction);
8576 match(Set dst (CountLeadingZerosL (LoadL src)));
8577 effect(KILL cr);
8578 ins_cost(175);
8579 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8580 ins_encode %{
8581 __ lzcntq($dst$$Register, $src$$Address);
8582 %}
8583 ins_pipe(ialu_reg_mem);
8584 %}
8585
8586 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8587 predicate(!UseCountLeadingZerosInstruction);
8588 match(Set dst (CountLeadingZerosL src));
8589 effect(KILL cr);
8590
8591 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8592 "jnz skip\n\t"
8593 "movl $dst, -1\n"
8594 "skip:\n\t"
8595 "negl $dst\n\t"
8596 "addl $dst, 63" %}
8597 ins_encode %{
8598 Register Rdst = $dst$$Register;
8599 Register Rsrc = $src$$Register;
8600 Label skip;
8601 __ bsrq(Rdst, Rsrc);
8602 __ jccb(Assembler::notZero, skip);
8603 __ movl(Rdst, -1);
8604 __ bind(skip);
8605 __ negl(Rdst);
8606 __ addl(Rdst, BitsPerLong - 1);
8607 %}
8608 ins_pipe(ialu_reg);
8609 %}
8610
8611 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8612 predicate(UseCountTrailingZerosInstruction);
8613 match(Set dst (CountTrailingZerosI src));
8614 effect(KILL cr);
8615
8616 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8617 ins_encode %{
8618 __ tzcntl($dst$$Register, $src$$Register);
8619 %}
8620 ins_pipe(ialu_reg);
8621 %}
8622
8623 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8624 predicate(UseCountTrailingZerosInstruction);
8625 match(Set dst (CountTrailingZerosI (LoadI src)));
8626 effect(KILL cr);
8627 ins_cost(175);
8628 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8629 ins_encode %{
8630 __ tzcntl($dst$$Register, $src$$Address);
8631 %}
8632 ins_pipe(ialu_reg_mem);
8633 %}
8634
8635 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8636 predicate(!UseCountTrailingZerosInstruction);
8637 match(Set dst (CountTrailingZerosI src));
8638 effect(KILL cr);
8639
8640 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8641 "jnz done\n\t"
8642 "movl $dst, 32\n"
8643 "done:" %}
8644 ins_encode %{
8645 Register Rdst = $dst$$Register;
8646 Label done;
8647 __ bsfl(Rdst, $src$$Register);
8648 __ jccb(Assembler::notZero, done);
8649 __ movl(Rdst, BitsPerInt);
8650 __ bind(done);
8651 %}
8652 ins_pipe(ialu_reg);
8653 %}
8654
8655 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8656 predicate(UseCountTrailingZerosInstruction);
8657 match(Set dst (CountTrailingZerosL src));
8658 effect(KILL cr);
8659
8660 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8661 ins_encode %{
8662 __ tzcntq($dst$$Register, $src$$Register);
8663 %}
8664 ins_pipe(ialu_reg);
8665 %}
8666
8667 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8668 predicate(UseCountTrailingZerosInstruction);
8669 match(Set dst (CountTrailingZerosL (LoadL src)));
8670 effect(KILL cr);
8671 ins_cost(175);
8672 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8673 ins_encode %{
8674 __ tzcntq($dst$$Register, $src$$Address);
8675 %}
8676 ins_pipe(ialu_reg_mem);
8677 %}
8678
8679 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8680 predicate(!UseCountTrailingZerosInstruction);
8681 match(Set dst (CountTrailingZerosL src));
8682 effect(KILL cr);
8683
8684 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8685 "jnz done\n\t"
8686 "movl $dst, 64\n"
8687 "done:" %}
8688 ins_encode %{
8689 Register Rdst = $dst$$Register;
8690 Label done;
8691 __ bsfq(Rdst, $src$$Register);
8692 __ jccb(Assembler::notZero, done);
8693 __ movl(Rdst, BitsPerLong);
8694 __ bind(done);
8695 %}
8696 ins_pipe(ialu_reg);
8697 %}
8698
8699 //--------------- Reverse Operation Instructions ----------------
8700 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8701 predicate(!VM_Version::supports_gfni());
8702 match(Set dst (ReverseI src));
8703 effect(TEMP dst, TEMP rtmp, KILL cr);
8704 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8705 ins_encode %{
8706 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8707 %}
8708 ins_pipe( ialu_reg );
8709 %}
8710
8711 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8712 predicate(VM_Version::supports_gfni());
8713 match(Set dst (ReverseI src));
8714 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8715 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8716 ins_encode %{
8717 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8718 %}
8719 ins_pipe( ialu_reg );
8720 %}
8721
8722 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8723 predicate(!VM_Version::supports_gfni());
8724 match(Set dst (ReverseL src));
8725 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8726 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8727 ins_encode %{
8728 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8729 %}
8730 ins_pipe( ialu_reg );
8731 %}
8732
8733 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8734 predicate(VM_Version::supports_gfni());
8735 match(Set dst (ReverseL src));
8736 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8737 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8738 ins_encode %{
8739 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8740 %}
8741 ins_pipe( ialu_reg );
8742 %}
8743
8744 //---------- Population Count Instructions -------------------------------------
8745
8746 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8747 predicate(UsePopCountInstruction);
8748 match(Set dst (PopCountI src));
8749 effect(KILL cr);
8750
8751 format %{ "popcnt $dst, $src" %}
8752 ins_encode %{
8753 __ popcntl($dst$$Register, $src$$Register);
8754 %}
8755 ins_pipe(ialu_reg);
8756 %}
8757
8758 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8759 predicate(UsePopCountInstruction);
8760 match(Set dst (PopCountI (LoadI mem)));
8761 effect(KILL cr);
8762
8763 format %{ "popcnt $dst, $mem" %}
8764 ins_encode %{
8765 __ popcntl($dst$$Register, $mem$$Address);
8766 %}
8767 ins_pipe(ialu_reg);
8768 %}
8769
8770 // Note: Long.bitCount(long) returns an int.
8771 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8772 predicate(UsePopCountInstruction);
8773 match(Set dst (PopCountL src));
8774 effect(KILL cr);
8775
8776 format %{ "popcnt $dst, $src" %}
8777 ins_encode %{
8778 __ popcntq($dst$$Register, $src$$Register);
8779 %}
8780 ins_pipe(ialu_reg);
8781 %}
8782
8783 // Note: Long.bitCount(long) returns an int.
8784 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8785 predicate(UsePopCountInstruction);
8786 match(Set dst (PopCountL (LoadL mem)));
8787 effect(KILL cr);
8788
8789 format %{ "popcnt $dst, $mem" %}
8790 ins_encode %{
8791 __ popcntq($dst$$Register, $mem$$Address);
8792 %}
8793 ins_pipe(ialu_reg);
8794 %}
8795
8796
8797 //----------MemBar Instructions-----------------------------------------------
8798 // Memory barrier flavors
8799
8800 instruct membar_acquire()
8801 %{
8802 match(MemBarAcquire);
8803 match(LoadFence);
8804 ins_cost(0);
8805
8806 size(0);
8807 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8808 ins_encode();
8809 ins_pipe(empty);
8810 %}
8811
8812 instruct membar_acquire_lock()
8813 %{
8814 match(MemBarAcquireLock);
8815 ins_cost(0);
8816
8817 size(0);
8818 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8819 ins_encode();
8820 ins_pipe(empty);
8821 %}
8822
8823 instruct membar_release()
8824 %{
8825 match(MemBarRelease);
8826 match(StoreFence);
8827 ins_cost(0);
8828
8829 size(0);
8830 format %{ "MEMBAR-release ! (empty encoding)" %}
8831 ins_encode();
8832 ins_pipe(empty);
8833 %}
8834
8835 instruct membar_release_lock()
8836 %{
8837 match(MemBarReleaseLock);
8838 ins_cost(0);
8839
8840 size(0);
8841 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8842 ins_encode();
8843 ins_pipe(empty);
8844 %}
8845
8846 instruct membar_storeload(rFlagsReg cr) %{
8847 match(MemBarStoreLoad);
8848 effect(KILL cr);
8849 ins_cost(400);
8850
8851 format %{
8852 $$template
8853 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8854 %}
8855 ins_encode %{
8856 __ membar(Assembler::StoreLoad);
8857 %}
8858 ins_pipe(pipe_slow);
8859 %}
8860
8861 instruct membar_volatile(rFlagsReg cr) %{
8862 match(MemBarVolatile);
8863 effect(KILL cr);
8864 ins_cost(400);
8865
8866 format %{
8867 $$template
8868 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8869 %}
8870 ins_encode %{
8871 __ membar(Assembler::StoreLoad);
8872 %}
8873 ins_pipe(pipe_slow);
8874 %}
8875
8876 instruct unnecessary_membar_volatile()
8877 %{
8878 match(MemBarVolatile);
8879 predicate(Matcher::post_store_load_barrier(n));
8880 ins_cost(0);
8881
8882 size(0);
8883 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8884 ins_encode();
8885 ins_pipe(empty);
8886 %}
8887
8888 instruct membar_full(rFlagsReg cr) %{
8889 match(MemBarFull);
8890 effect(KILL cr);
8891 ins_cost(400);
8892
8893 format %{
8894 $$template
8895 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8896 %}
8897 ins_encode %{
8898 __ membar(Assembler::StoreLoad);
8899 %}
8900 ins_pipe(pipe_slow);
8901 %}
8902
8903 instruct membar_storestore() %{
8904 match(MemBarStoreStore);
8905 match(StoreStoreFence);
8906 ins_cost(0);
8907
8908 size(0);
8909 format %{ "MEMBAR-storestore (empty encoding)" %}
8910 ins_encode( );
8911 ins_pipe(empty);
8912 %}
8913
8914 //----------Move Instructions--------------------------------------------------
8915
8916 instruct castX2P(rRegP dst, rRegL src)
8917 %{
8918 match(Set dst (CastX2P src));
8919
8920 format %{ "movq $dst, $src\t# long->ptr" %}
8921 ins_encode %{
8922 if ($dst$$reg != $src$$reg) {
8923 __ movptr($dst$$Register, $src$$Register);
8924 }
8925 %}
8926 ins_pipe(ialu_reg_reg); // XXX
8927 %}
8928
8929 instruct castP2X(rRegL dst, rRegP src)
8930 %{
8931 match(Set dst (CastP2X src));
8932
8933 format %{ "movq $dst, $src\t# ptr -> long" %}
8934 ins_encode %{
8935 if ($dst$$reg != $src$$reg) {
8936 __ movptr($dst$$Register, $src$$Register);
8937 }
8938 %}
8939 ins_pipe(ialu_reg_reg); // XXX
8940 %}
8941
8942 // Convert oop into int for vectors alignment masking
8943 instruct convP2I(rRegI dst, rRegP src)
8944 %{
8945 match(Set dst (ConvL2I (CastP2X src)));
8946
8947 format %{ "movl $dst, $src\t# ptr -> int" %}
8948 ins_encode %{
8949 __ movl($dst$$Register, $src$$Register);
8950 %}
8951 ins_pipe(ialu_reg_reg); // XXX
8952 %}
8953
8954 // Convert compressed oop into int for vectors alignment masking
8955 // in case of 32bit oops (heap < 4Gb).
8956 instruct convN2I(rRegI dst, rRegN src)
8957 %{
8958 predicate(CompressedOops::shift() == 0);
8959 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8960
8961 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8962 ins_encode %{
8963 __ movl($dst$$Register, $src$$Register);
8964 %}
8965 ins_pipe(ialu_reg_reg); // XXX
8966 %}
8967
8968 // Convert oop pointer into compressed form
8969 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8970 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8971 match(Set dst (EncodeP src));
8972 effect(KILL cr);
8973 format %{ "encode_heap_oop $dst,$src" %}
8974 ins_encode %{
8975 Register s = $src$$Register;
8976 Register d = $dst$$Register;
8977 if (s != d) {
8978 __ movq(d, s);
8979 }
8980 __ encode_heap_oop(d);
8981 %}
8982 ins_pipe(ialu_reg_long);
8983 %}
8984
8985 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8986 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8987 match(Set dst (EncodeP src));
8988 effect(KILL cr);
8989 format %{ "encode_heap_oop_not_null $dst,$src" %}
8990 ins_encode %{
8991 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8992 %}
8993 ins_pipe(ialu_reg_long);
8994 %}
8995
8996 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8997 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8998 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8999 match(Set dst (DecodeN src));
9000 effect(KILL cr);
9001 format %{ "decode_heap_oop $dst,$src" %}
9002 ins_encode %{
9003 Register s = $src$$Register;
9004 Register d = $dst$$Register;
9005 if (s != d) {
9006 __ movq(d, s);
9007 }
9008 __ decode_heap_oop(d);
9009 %}
9010 ins_pipe(ialu_reg_long);
9011 %}
9012
9013 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9014 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9015 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9016 match(Set dst (DecodeN src));
9017 effect(KILL cr);
9018 format %{ "decode_heap_oop_not_null $dst,$src" %}
9019 ins_encode %{
9020 Register s = $src$$Register;
9021 Register d = $dst$$Register;
9022 if (s != d) {
9023 __ decode_heap_oop_not_null(d, s);
9024 } else {
9025 __ decode_heap_oop_not_null(d);
9026 }
9027 %}
9028 ins_pipe(ialu_reg_long);
9029 %}
9030
9031 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9032 match(Set dst (EncodePKlass src));
9033 effect(TEMP dst, KILL cr);
9034 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9035 ins_encode %{
9036 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9037 %}
9038 ins_pipe(ialu_reg_long);
9039 %}
9040
9041 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9042 match(Set dst (DecodeNKlass src));
9043 effect(TEMP dst, KILL cr);
9044 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9045 ins_encode %{
9046 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9047 %}
9048 ins_pipe(ialu_reg_long);
9049 %}
9050
9051 //----------Conditional Move---------------------------------------------------
9052 // Jump
9053 // dummy instruction for generating temp registers
9054 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9055 match(Jump (LShiftL switch_val shift));
9056 ins_cost(350);
9057 predicate(false);
9058 effect(TEMP dest);
9059
9060 format %{ "leaq $dest, [$constantaddress]\n\t"
9061 "jmp [$dest + $switch_val << $shift]\n\t" %}
9062 ins_encode %{
9063 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9064 // to do that and the compiler is using that register as one it can allocate.
9065 // So we build it all by hand.
9066 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9067 // ArrayAddress dispatch(table, index);
9068 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9069 __ lea($dest$$Register, $constantaddress);
9070 __ jmp(dispatch);
9071 %}
9072 ins_pipe(pipe_jmp);
9073 %}
9074
9075 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9076 match(Jump (AddL (LShiftL switch_val shift) offset));
9077 ins_cost(350);
9078 effect(TEMP dest);
9079
9080 format %{ "leaq $dest, [$constantaddress]\n\t"
9081 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9082 ins_encode %{
9083 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9084 // to do that and the compiler is using that register as one it can allocate.
9085 // So we build it all by hand.
9086 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9087 // ArrayAddress dispatch(table, index);
9088 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9089 __ lea($dest$$Register, $constantaddress);
9090 __ jmp(dispatch);
9091 %}
9092 ins_pipe(pipe_jmp);
9093 %}
9094
9095 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9096 match(Jump switch_val);
9097 ins_cost(350);
9098 effect(TEMP dest);
9099
9100 format %{ "leaq $dest, [$constantaddress]\n\t"
9101 "jmp [$dest + $switch_val]\n\t" %}
9102 ins_encode %{
9103 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9104 // to do that and the compiler is using that register as one it can allocate.
9105 // So we build it all by hand.
9106 // Address index(noreg, switch_reg, Address::times_1);
9107 // ArrayAddress dispatch(table, index);
9108 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9109 __ lea($dest$$Register, $constantaddress);
9110 __ jmp(dispatch);
9111 %}
9112 ins_pipe(pipe_jmp);
9113 %}
9114
9115 // Conditional move
9116 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9117 %{
9118 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9119 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9120
9121 ins_cost(100); // XXX
9122 format %{ "setbn$cop $dst\t# signed, int" %}
9123 ins_encode %{
9124 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9125 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9126 %}
9127 ins_pipe(ialu_reg);
9128 %}
9129
9130 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9131 %{
9132 predicate(!UseAPX);
9133 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9134
9135 ins_cost(200); // XXX
9136 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9137 ins_encode %{
9138 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9139 %}
9140 ins_pipe(pipe_cmov_reg);
9141 %}
9142
9143 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9144 %{
9145 predicate(UseAPX);
9146 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9147
9148 ins_cost(200);
9149 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9150 ins_encode %{
9151 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9152 %}
9153 ins_pipe(pipe_cmov_reg);
9154 %}
9155
9156 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9157 %{
9158 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9159 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9160
9161 ins_cost(100); // XXX
9162 format %{ "setbn$cop $dst\t# unsigned, int" %}
9163 ins_encode %{
9164 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9165 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9166 %}
9167 ins_pipe(ialu_reg);
9168 %}
9169
9170 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9171 predicate(!UseAPX);
9172 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9173
9174 ins_cost(200); // XXX
9175 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9176 ins_encode %{
9177 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9178 %}
9179 ins_pipe(pipe_cmov_reg);
9180 %}
9181
9182 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9183 predicate(UseAPX);
9184 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9185
9186 ins_cost(200);
9187 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9188 ins_encode %{
9189 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9190 %}
9191 ins_pipe(pipe_cmov_reg);
9192 %}
9193
9194 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9195 %{
9196 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9197 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9198
9199 ins_cost(100); // XXX
9200 format %{ "setbn$cop $dst\t# unsigned, int" %}
9201 ins_encode %{
9202 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9203 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9204 %}
9205 ins_pipe(ialu_reg);
9206 %}
9207
9208 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9209 %{
9210 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9211 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9212
9213 ins_cost(100); // XXX
9214 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9215 ins_encode %{
9216 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9217 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9218 %}
9219 ins_pipe(ialu_reg);
9220 %}
9221
9222 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9223 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9224
9225 ins_cost(200);
9226 expand %{
9227 cmovI_regU(cop, cr, dst, src);
9228 %}
9229 %}
9230
9231 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9232 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9233
9234 ins_cost(200);
9235 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9236 ins_encode %{
9237 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9238 %}
9239 ins_pipe(pipe_cmov_reg);
9240 %}
9241
9242 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9243 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9244 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9245
9246 ins_cost(200); // XXX
9247 format %{ "cmovpl $dst, $src\n\t"
9248 "cmovnel $dst, $src" %}
9249 ins_encode %{
9250 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9251 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9252 %}
9253 ins_pipe(pipe_cmov_reg);
9254 %}
9255
9256 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9257 // inputs of the CMove
9258 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9259 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9260 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9261 effect(TEMP dst);
9262
9263 ins_cost(200); // XXX
9264 format %{ "cmovpl $dst, $src\n\t"
9265 "cmovnel $dst, $src" %}
9266 ins_encode %{
9267 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9268 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9269 %}
9270 ins_pipe(pipe_cmov_reg);
9271 %}
9272
9273 // Conditional move
9274 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9275 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9276
9277 ins_cost(250); // XXX
9278 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9279 ins_encode %{
9280 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9281 %}
9282 ins_pipe(pipe_cmov_mem);
9283 %}
9284
9285 // Conditional move
9286 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9287 %{
9288 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9289
9290 ins_cost(250); // XXX
9291 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9292 ins_encode %{
9293 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9294 %}
9295 ins_pipe(pipe_cmov_mem);
9296 %}
9297
9298 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9299 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9300
9301 ins_cost(250);
9302 expand %{
9303 cmovI_memU(cop, cr, dst, src);
9304 %}
9305 %}
9306
9307 instruct cmovI_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI dst, memory src) %{
9308 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9309
9310 ins_cost(250); // XXX
9311 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9312 ins_encode %{
9313 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9314 %}
9315 ins_pipe(pipe_cmov_mem);
9316 %}
9317
9318 // Conditional move
9319 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9320 %{
9321 predicate(!UseAPX);
9322 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9323
9324 ins_cost(200); // XXX
9325 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9326 ins_encode %{
9327 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9328 %}
9329 ins_pipe(pipe_cmov_reg);
9330 %}
9331
9332 // Conditional move ndd
9333 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9334 %{
9335 predicate(UseAPX);
9336 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9337
9338 ins_cost(200);
9339 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9340 ins_encode %{
9341 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9342 %}
9343 ins_pipe(pipe_cmov_reg);
9344 %}
9345
9346 // Conditional move
9347 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9348 %{
9349 predicate(!UseAPX);
9350 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9351
9352 ins_cost(200); // XXX
9353 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9354 ins_encode %{
9355 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9356 %}
9357 ins_pipe(pipe_cmov_reg);
9358 %}
9359
9360 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9361 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9362
9363 ins_cost(200);
9364 expand %{
9365 cmovN_regU(cop, cr, dst, src);
9366 %}
9367 %}
9368
9369 // Conditional move ndd
9370 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9371 %{
9372 predicate(UseAPX);
9373 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9374
9375 ins_cost(200);
9376 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9377 ins_encode %{
9378 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9379 %}
9380 ins_pipe(pipe_cmov_reg);
9381 %}
9382
9383 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9384 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9385
9386 ins_cost(200);
9387 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9388 ins_encode %{
9389 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9390 %}
9391 ins_pipe(pipe_cmov_reg);
9392 %}
9393
9394 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9395 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9396 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9397
9398 ins_cost(200); // XXX
9399 format %{ "cmovpl $dst, $src\n\t"
9400 "cmovnel $dst, $src" %}
9401 ins_encode %{
9402 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9403 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9404 %}
9405 ins_pipe(pipe_cmov_reg);
9406 %}
9407
9408 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9409 // inputs of the CMove
9410 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9411 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9412 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9413
9414 ins_cost(200); // XXX
9415 format %{ "cmovpl $dst, $src\n\t"
9416 "cmovnel $dst, $src" %}
9417 ins_encode %{
9418 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9419 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9420 %}
9421 ins_pipe(pipe_cmov_reg);
9422 %}
9423
9424 // Conditional move
9425 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9426 %{
9427 predicate(!UseAPX);
9428 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9429
9430 ins_cost(200); // XXX
9431 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9432 ins_encode %{
9433 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9434 %}
9435 ins_pipe(pipe_cmov_reg); // XXX
9436 %}
9437
9438 // Conditional move ndd
9439 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9440 %{
9441 predicate(UseAPX);
9442 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9443
9444 ins_cost(200);
9445 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9446 ins_encode %{
9447 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9448 %}
9449 ins_pipe(pipe_cmov_reg);
9450 %}
9451
9452 // Conditional move
9453 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9454 %{
9455 predicate(!UseAPX);
9456 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9457
9458 ins_cost(200); // XXX
9459 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9460 ins_encode %{
9461 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9462 %}
9463 ins_pipe(pipe_cmov_reg); // XXX
9464 %}
9465
9466 // Conditional move ndd
9467 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9468 %{
9469 predicate(UseAPX);
9470 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9471
9472 ins_cost(200);
9473 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9474 ins_encode %{
9475 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9476 %}
9477 ins_pipe(pipe_cmov_reg);
9478 %}
9479
9480 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9481 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9482
9483 ins_cost(200);
9484 expand %{
9485 cmovP_regU(cop, cr, dst, src);
9486 %}
9487 %}
9488
9489 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9490 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9491
9492 ins_cost(200);
9493 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9494 ins_encode %{
9495 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9496 %}
9497 ins_pipe(pipe_cmov_reg);
9498 %}
9499
9500 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9501 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9502 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9503
9504 ins_cost(200); // XXX
9505 format %{ "cmovpq $dst, $src\n\t"
9506 "cmovneq $dst, $src" %}
9507 ins_encode %{
9508 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9509 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9510 %}
9511 ins_pipe(pipe_cmov_reg);
9512 %}
9513
9514 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9515 // inputs of the CMove
9516 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9517 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9518 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9519
9520 ins_cost(200); // XXX
9521 format %{ "cmovpq $dst, $src\n\t"
9522 "cmovneq $dst, $src" %}
9523 ins_encode %{
9524 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9525 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9526 %}
9527 ins_pipe(pipe_cmov_reg);
9528 %}
9529
9530 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9531 %{
9532 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9533 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9534
9535 ins_cost(100); // XXX
9536 format %{ "setbn$cop $dst\t# signed, long" %}
9537 ins_encode %{
9538 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9539 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9540 %}
9541 ins_pipe(ialu_reg);
9542 %}
9543
9544 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9545 %{
9546 predicate(!UseAPX);
9547 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9548
9549 ins_cost(200); // XXX
9550 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9551 ins_encode %{
9552 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9553 %}
9554 ins_pipe(pipe_cmov_reg); // XXX
9555 %}
9556
9557 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9558 %{
9559 predicate(UseAPX);
9560 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9561
9562 ins_cost(200);
9563 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9564 ins_encode %{
9565 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9566 %}
9567 ins_pipe(pipe_cmov_reg);
9568 %}
9569
9570 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9571 %{
9572 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9573
9574 ins_cost(200); // XXX
9575 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9576 ins_encode %{
9577 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9578 %}
9579 ins_pipe(pipe_cmov_mem); // XXX
9580 %}
9581
9582 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9583 %{
9584 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9585 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9586
9587 ins_cost(100); // XXX
9588 format %{ "setbn$cop $dst\t# unsigned, long" %}
9589 ins_encode %{
9590 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9591 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9592 %}
9593 ins_pipe(ialu_reg);
9594 %}
9595
9596 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9597 %{
9598 predicate(!UseAPX);
9599 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9600
9601 ins_cost(200); // XXX
9602 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9603 ins_encode %{
9604 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9605 %}
9606 ins_pipe(pipe_cmov_reg); // XXX
9607 %}
9608
9609 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9610 %{
9611 predicate(UseAPX);
9612 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9613
9614 ins_cost(200);
9615 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9616 ins_encode %{
9617 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9618 %}
9619 ins_pipe(pipe_cmov_reg);
9620 %}
9621
9622 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9623 %{
9624 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9625 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9626
9627 ins_cost(100); // XXX
9628 format %{ "setbn$cop $dst\t# unsigned, long" %}
9629 ins_encode %{
9630 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9631 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9632 %}
9633 ins_pipe(ialu_reg);
9634 %}
9635
9636 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9637 %{
9638 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9639 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9640
9641 ins_cost(100); // XXX
9642 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9643 ins_encode %{
9644 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9645 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9646 %}
9647 ins_pipe(ialu_reg);
9648 %}
9649
9650 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9651 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9652
9653 ins_cost(200);
9654 expand %{
9655 cmovL_regU(cop, cr, dst, src);
9656 %}
9657 %}
9658
9659 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9660 %{
9661 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9662
9663 ins_cost(200);
9664 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9665 ins_encode %{
9666 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9667 %}
9668 ins_pipe(pipe_cmov_reg);
9669 %}
9670
9671 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9672 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9673 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9674
9675 ins_cost(200); // XXX
9676 format %{ "cmovpq $dst, $src\n\t"
9677 "cmovneq $dst, $src" %}
9678 ins_encode %{
9679 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9680 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9681 %}
9682 ins_pipe(pipe_cmov_reg);
9683 %}
9684
9685 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9686 // inputs of the CMove
9687 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9688 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9689 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9690
9691 ins_cost(200); // XXX
9692 format %{ "cmovpq $dst, $src\n\t"
9693 "cmovneq $dst, $src" %}
9694 ins_encode %{
9695 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9696 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9697 %}
9698 ins_pipe(pipe_cmov_reg);
9699 %}
9700
9701 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9702 %{
9703 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9704
9705 ins_cost(200); // XXX
9706 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9707 ins_encode %{
9708 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9709 %}
9710 ins_pipe(pipe_cmov_mem); // XXX
9711 %}
9712
9713 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9714 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9715
9716 ins_cost(200);
9717 expand %{
9718 cmovL_memU(cop, cr, dst, src);
9719 %}
9720 %}
9721
9722 instruct cmovL_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL dst, memory src) %{
9723 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9724
9725 ins_cost(200); // XXX
9726 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9727 ins_encode %{
9728 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9729 %}
9730 ins_pipe(pipe_cmov_mem); // XXX
9731 %}
9732
9733 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9734 %{
9735 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9736
9737 ins_cost(200); // XXX
9738 format %{ "jn$cop skip\t# signed cmove float\n\t"
9739 "movss $dst, $src\n"
9740 "skip:" %}
9741 ins_encode %{
9742 Label Lskip;
9743 // Invert sense of branch from sense of CMOV
9744 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9745 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9746 __ bind(Lskip);
9747 %}
9748 ins_pipe(pipe_slow);
9749 %}
9750
9751 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9752 %{
9753 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9754
9755 ins_cost(200); // XXX
9756 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9757 "movss $dst, $src\n"
9758 "skip:" %}
9759 ins_encode %{
9760 Label Lskip;
9761 // Invert sense of branch from sense of CMOV
9762 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9763 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9764 __ bind(Lskip);
9765 %}
9766 ins_pipe(pipe_slow);
9767 %}
9768
9769 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9770 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9771
9772 ins_cost(200);
9773 expand %{
9774 cmovF_regU(cop, cr, dst, src);
9775 %}
9776 %}
9777
9778 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9779 %{
9780 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9781
9782 ins_cost(200); // XXX
9783 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9784 "movss $dst, $src\n"
9785 "skip:" %}
9786 ins_encode %{
9787 Label Lskip;
9788 // Invert sense of branch from sense of CMOV
9789 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9790 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9791 __ bind(Lskip);
9792 %}
9793 ins_pipe(pipe_slow);
9794 %}
9795
9796 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9797 %{
9798 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9799
9800 ins_cost(200); // XXX
9801 format %{ "jn$cop skip\t# signed cmove double\n\t"
9802 "movsd $dst, $src\n"
9803 "skip:" %}
9804 ins_encode %{
9805 Label Lskip;
9806 // Invert sense of branch from sense of CMOV
9807 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9808 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9809 __ bind(Lskip);
9810 %}
9811 ins_pipe(pipe_slow);
9812 %}
9813
9814 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9815 %{
9816 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9817
9818 ins_cost(200); // XXX
9819 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9820 "movsd $dst, $src\n"
9821 "skip:" %}
9822 ins_encode %{
9823 Label Lskip;
9824 // Invert sense of branch from sense of CMOV
9825 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9826 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9827 __ bind(Lskip);
9828 %}
9829 ins_pipe(pipe_slow);
9830 %}
9831
9832 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9833 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9834
9835 ins_cost(200);
9836 expand %{
9837 cmovD_regU(cop, cr, dst, src);
9838 %}
9839 %}
9840
9841 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9842 %{
9843 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9844
9845 ins_cost(200); // XXX
9846 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9847 "movsd $dst, $src\n"
9848 "skip:" %}
9849 ins_encode %{
9850 Label Lskip;
9851 // Invert sense of branch from sense of CMOV
9852 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9853 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9854 __ bind(Lskip);
9855 %}
9856 ins_pipe(pipe_slow);
9857 %}
9858
9859 //----------Arithmetic Instructions--------------------------------------------
9860 //----------Addition Instructions----------------------------------------------
9861
9862 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9863 %{
9864 predicate(!UseAPX);
9865 match(Set dst (AddI dst src));
9866 effect(KILL cr);
9867 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9868 format %{ "addl $dst, $src\t# int" %}
9869 ins_encode %{
9870 __ addl($dst$$Register, $src$$Register);
9871 %}
9872 ins_pipe(ialu_reg_reg);
9873 %}
9874
9875 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9876 %{
9877 predicate(UseAPX);
9878 match(Set dst (AddI src1 src2));
9879 effect(KILL cr);
9880 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9881
9882 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9883 ins_encode %{
9884 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9885 %}
9886 ins_pipe(ialu_reg_reg);
9887 %}
9888
9889 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9890 %{
9891 predicate(!UseAPX);
9892 match(Set dst (AddI dst src));
9893 effect(KILL cr);
9894 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9895
9896 format %{ "addl $dst, $src\t# int" %}
9897 ins_encode %{
9898 __ addl($dst$$Register, $src$$constant);
9899 %}
9900 ins_pipe( ialu_reg );
9901 %}
9902
9903 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9904 %{
9905 predicate(UseAPX);
9906 match(Set dst (AddI src1 src2));
9907 effect(KILL cr);
9908 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9909
9910 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9911 ins_encode %{
9912 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9913 %}
9914 ins_pipe( ialu_reg );
9915 %}
9916
9917 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9918 %{
9919 match(Set dst (AddI dst (LoadI src)));
9920 effect(KILL cr);
9921 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9922
9923 ins_cost(150); // XXX
9924 format %{ "addl $dst, $src\t# int" %}
9925 ins_encode %{
9926 __ addl($dst$$Register, $src$$Address);
9927 %}
9928 ins_pipe(ialu_reg_mem);
9929 %}
9930
9931 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9932 %{
9933 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9934 effect(KILL cr);
9935 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9936
9937 ins_cost(150); // XXX
9938 format %{ "addl $dst, $src\t# int" %}
9939 ins_encode %{
9940 __ addl($dst$$Address, $src$$Register);
9941 %}
9942 ins_pipe(ialu_mem_reg);
9943 %}
9944
9945 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9946 %{
9947 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9948 effect(KILL cr);
9949 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9950
9951
9952 ins_cost(125); // XXX
9953 format %{ "addl $dst, $src\t# int" %}
9954 ins_encode %{
9955 __ addl($dst$$Address, $src$$constant);
9956 %}
9957 ins_pipe(ialu_mem_imm);
9958 %}
9959
9960 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9961 %{
9962 predicate(!UseAPX && UseIncDec);
9963 match(Set dst (AddI dst src));
9964 effect(KILL cr);
9965
9966 format %{ "incl $dst\t# int" %}
9967 ins_encode %{
9968 __ incrementl($dst$$Register);
9969 %}
9970 ins_pipe(ialu_reg);
9971 %}
9972
9973 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9974 %{
9975 predicate(UseAPX && UseIncDec);
9976 match(Set dst (AddI src val));
9977 effect(KILL cr);
9978 flag(PD::Flag_ndd_demotable_opr1);
9979
9980 format %{ "eincl $dst, $src\t# int ndd" %}
9981 ins_encode %{
9982 __ eincl($dst$$Register, $src$$Register, false);
9983 %}
9984 ins_pipe(ialu_reg);
9985 %}
9986
9987 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
9988 %{
9989 predicate(UseIncDec);
9990 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9991 effect(KILL cr);
9992
9993 ins_cost(125); // XXX
9994 format %{ "incl $dst\t# int" %}
9995 ins_encode %{
9996 __ incrementl($dst$$Address);
9997 %}
9998 ins_pipe(ialu_mem_imm);
9999 %}
10000
10001 // XXX why does that use AddI
10002 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10003 %{
10004 predicate(!UseAPX && UseIncDec);
10005 match(Set dst (AddI dst src));
10006 effect(KILL cr);
10007
10008 format %{ "decl $dst\t# int" %}
10009 ins_encode %{
10010 __ decrementl($dst$$Register);
10011 %}
10012 ins_pipe(ialu_reg);
10013 %}
10014
10015 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10016 %{
10017 predicate(UseAPX && UseIncDec);
10018 match(Set dst (AddI src val));
10019 effect(KILL cr);
10020 flag(PD::Flag_ndd_demotable_opr1);
10021
10022 format %{ "edecl $dst, $src\t# int ndd" %}
10023 ins_encode %{
10024 __ edecl($dst$$Register, $src$$Register, false);
10025 %}
10026 ins_pipe(ialu_reg);
10027 %}
10028
10029 // XXX why does that use AddI
10030 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10031 %{
10032 predicate(UseIncDec);
10033 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10034 effect(KILL cr);
10035
10036 ins_cost(125); // XXX
10037 format %{ "decl $dst\t# int" %}
10038 ins_encode %{
10039 __ decrementl($dst$$Address);
10040 %}
10041 ins_pipe(ialu_mem_imm);
10042 %}
10043
10044 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10045 %{
10046 predicate(VM_Version::supports_fast_2op_lea());
10047 match(Set dst (AddI (LShiftI index scale) disp));
10048
10049 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10050 ins_encode %{
10051 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10052 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10053 %}
10054 ins_pipe(ialu_reg_reg);
10055 %}
10056
10057 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10058 %{
10059 predicate(VM_Version::supports_fast_3op_lea());
10060 match(Set dst (AddI (AddI base index) disp));
10061
10062 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10063 ins_encode %{
10064 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10065 %}
10066 ins_pipe(ialu_reg_reg);
10067 %}
10068
10069 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10070 %{
10071 predicate(VM_Version::supports_fast_2op_lea());
10072 match(Set dst (AddI base (LShiftI index scale)));
10073
10074 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10075 ins_encode %{
10076 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10077 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10078 %}
10079 ins_pipe(ialu_reg_reg);
10080 %}
10081
10082 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10083 %{
10084 predicate(VM_Version::supports_fast_3op_lea());
10085 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10086
10087 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10088 ins_encode %{
10089 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10090 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10091 %}
10092 ins_pipe(ialu_reg_reg);
10093 %}
10094
10095 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10096 %{
10097 predicate(!UseAPX);
10098 match(Set dst (AddL dst src));
10099 effect(KILL cr);
10100 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10101
10102 format %{ "addq $dst, $src\t# long" %}
10103 ins_encode %{
10104 __ addq($dst$$Register, $src$$Register);
10105 %}
10106 ins_pipe(ialu_reg_reg);
10107 %}
10108
10109 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10110 %{
10111 predicate(UseAPX);
10112 match(Set dst (AddL src1 src2));
10113 effect(KILL cr);
10114 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10115
10116 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10117 ins_encode %{
10118 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10119 %}
10120 ins_pipe(ialu_reg_reg);
10121 %}
10122
10123 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10124 %{
10125 predicate(!UseAPX);
10126 match(Set dst (AddL dst src));
10127 effect(KILL cr);
10128 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10129
10130 format %{ "addq $dst, $src\t# long" %}
10131 ins_encode %{
10132 __ addq($dst$$Register, $src$$constant);
10133 %}
10134 ins_pipe( ialu_reg );
10135 %}
10136
10137 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10138 %{
10139 predicate(UseAPX);
10140 match(Set dst (AddL src1 src2));
10141 effect(KILL cr);
10142 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10143
10144 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10145 ins_encode %{
10146 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10147 %}
10148 ins_pipe( ialu_reg );
10149 %}
10150
10151 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10152 %{
10153 match(Set dst (AddL dst (LoadL src)));
10154 effect(KILL cr);
10155 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10156
10157 ins_cost(150); // XXX
10158 format %{ "addq $dst, $src\t# long" %}
10159 ins_encode %{
10160 __ addq($dst$$Register, $src$$Address);
10161 %}
10162 ins_pipe(ialu_reg_mem);
10163 %}
10164
10165 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10166 %{
10167 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10168 effect(KILL cr);
10169 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10170
10171 ins_cost(150); // XXX
10172 format %{ "addq $dst, $src\t# long" %}
10173 ins_encode %{
10174 __ addq($dst$$Address, $src$$Register);
10175 %}
10176 ins_pipe(ialu_mem_reg);
10177 %}
10178
10179 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10180 %{
10181 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10182 effect(KILL cr);
10183 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10184
10185 ins_cost(125); // XXX
10186 format %{ "addq $dst, $src\t# long" %}
10187 ins_encode %{
10188 __ addq($dst$$Address, $src$$constant);
10189 %}
10190 ins_pipe(ialu_mem_imm);
10191 %}
10192
10193 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10194 %{
10195 predicate(!UseAPX && UseIncDec);
10196 match(Set dst (AddL dst src));
10197 effect(KILL cr);
10198
10199 format %{ "incq $dst\t# long" %}
10200 ins_encode %{
10201 __ incrementq($dst$$Register);
10202 %}
10203 ins_pipe(ialu_reg);
10204 %}
10205
10206 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10207 %{
10208 predicate(UseAPX && UseIncDec);
10209 match(Set dst (AddL src val));
10210 effect(KILL cr);
10211 flag(PD::Flag_ndd_demotable_opr1);
10212
10213 format %{ "eincq $dst, $src\t# long ndd" %}
10214 ins_encode %{
10215 __ eincq($dst$$Register, $src$$Register, false);
10216 %}
10217 ins_pipe(ialu_reg);
10218 %}
10219
10220 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10221 %{
10222 predicate(UseIncDec);
10223 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10224 effect(KILL cr);
10225
10226 ins_cost(125); // XXX
10227 format %{ "incq $dst\t# long" %}
10228 ins_encode %{
10229 __ incrementq($dst$$Address);
10230 %}
10231 ins_pipe(ialu_mem_imm);
10232 %}
10233
10234 // XXX why does that use AddL
10235 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10236 %{
10237 predicate(!UseAPX && UseIncDec);
10238 match(Set dst (AddL dst src));
10239 effect(KILL cr);
10240
10241 format %{ "decq $dst\t# long" %}
10242 ins_encode %{
10243 __ decrementq($dst$$Register);
10244 %}
10245 ins_pipe(ialu_reg);
10246 %}
10247
10248 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10249 %{
10250 predicate(UseAPX && UseIncDec);
10251 match(Set dst (AddL src val));
10252 effect(KILL cr);
10253 flag(PD::Flag_ndd_demotable_opr1);
10254
10255 format %{ "edecq $dst, $src\t# long ndd" %}
10256 ins_encode %{
10257 __ edecq($dst$$Register, $src$$Register, false);
10258 %}
10259 ins_pipe(ialu_reg);
10260 %}
10261
10262 // XXX why does that use AddL
10263 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10264 %{
10265 predicate(UseIncDec);
10266 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10267 effect(KILL cr);
10268
10269 ins_cost(125); // XXX
10270 format %{ "decq $dst\t# long" %}
10271 ins_encode %{
10272 __ decrementq($dst$$Address);
10273 %}
10274 ins_pipe(ialu_mem_imm);
10275 %}
10276
10277 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10278 %{
10279 predicate(VM_Version::supports_fast_2op_lea());
10280 match(Set dst (AddL (LShiftL index scale) disp));
10281
10282 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10283 ins_encode %{
10284 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10285 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10286 %}
10287 ins_pipe(ialu_reg_reg);
10288 %}
10289
10290 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10291 %{
10292 predicate(VM_Version::supports_fast_3op_lea());
10293 match(Set dst (AddL (AddL base index) disp));
10294
10295 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10296 ins_encode %{
10297 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10298 %}
10299 ins_pipe(ialu_reg_reg);
10300 %}
10301
10302 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10303 %{
10304 predicate(VM_Version::supports_fast_2op_lea());
10305 match(Set dst (AddL base (LShiftL index scale)));
10306
10307 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10308 ins_encode %{
10309 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10310 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10311 %}
10312 ins_pipe(ialu_reg_reg);
10313 %}
10314
10315 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10316 %{
10317 predicate(VM_Version::supports_fast_3op_lea());
10318 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10319
10320 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10321 ins_encode %{
10322 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10323 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10324 %}
10325 ins_pipe(ialu_reg_reg);
10326 %}
10327
10328 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10329 %{
10330 match(Set dst (AddP dst src));
10331 effect(KILL cr);
10332 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10333
10334 format %{ "addq $dst, $src\t# ptr" %}
10335 ins_encode %{
10336 __ addq($dst$$Register, $src$$Register);
10337 %}
10338 ins_pipe(ialu_reg_reg);
10339 %}
10340
10341 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10342 %{
10343 match(Set dst (AddP dst src));
10344 effect(KILL cr);
10345 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10346
10347 format %{ "addq $dst, $src\t# ptr" %}
10348 ins_encode %{
10349 __ addq($dst$$Register, $src$$constant);
10350 %}
10351 ins_pipe( ialu_reg );
10352 %}
10353
10354 // XXX addP mem ops ????
10355
10356 instruct checkCastPP(rRegP dst)
10357 %{
10358 match(Set dst (CheckCastPP dst));
10359
10360 size(0);
10361 format %{ "# checkcastPP of $dst" %}
10362 ins_encode(/* empty encoding */);
10363 ins_pipe(empty);
10364 %}
10365
10366 instruct castPP(rRegP dst)
10367 %{
10368 match(Set dst (CastPP dst));
10369
10370 size(0);
10371 format %{ "# castPP of $dst" %}
10372 ins_encode(/* empty encoding */);
10373 ins_pipe(empty);
10374 %}
10375
10376 instruct castII(rRegI dst)
10377 %{
10378 predicate(VerifyConstraintCasts == 0);
10379 match(Set dst (CastII dst));
10380
10381 size(0);
10382 format %{ "# castII of $dst" %}
10383 ins_encode(/* empty encoding */);
10384 ins_cost(0);
10385 ins_pipe(empty);
10386 %}
10387
10388 instruct castII_checked(rRegI dst, rFlagsReg cr)
10389 %{
10390 predicate(VerifyConstraintCasts > 0);
10391 match(Set dst (CastII dst));
10392
10393 effect(KILL cr);
10394 format %{ "# cast_checked_II $dst" %}
10395 ins_encode %{
10396 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10397 %}
10398 ins_pipe(pipe_slow);
10399 %}
10400
10401 instruct castLL(rRegL dst)
10402 %{
10403 predicate(VerifyConstraintCasts == 0);
10404 match(Set dst (CastLL dst));
10405
10406 size(0);
10407 format %{ "# castLL of $dst" %}
10408 ins_encode(/* empty encoding */);
10409 ins_cost(0);
10410 ins_pipe(empty);
10411 %}
10412
10413 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10414 %{
10415 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10416 match(Set dst (CastLL dst));
10417
10418 effect(KILL cr);
10419 format %{ "# cast_checked_LL $dst" %}
10420 ins_encode %{
10421 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10422 %}
10423 ins_pipe(pipe_slow);
10424 %}
10425
10426 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10427 %{
10428 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10429 match(Set dst (CastLL dst));
10430
10431 effect(KILL cr, TEMP tmp);
10432 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10433 ins_encode %{
10434 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10435 %}
10436 ins_pipe(pipe_slow);
10437 %}
10438
10439 instruct castFF(regF dst)
10440 %{
10441 match(Set dst (CastFF dst));
10442
10443 size(0);
10444 format %{ "# castFF of $dst" %}
10445 ins_encode(/* empty encoding */);
10446 ins_cost(0);
10447 ins_pipe(empty);
10448 %}
10449
10450 instruct castHH(regF dst)
10451 %{
10452 match(Set dst (CastHH dst));
10453
10454 size(0);
10455 format %{ "# castHH of $dst" %}
10456 ins_encode(/* empty encoding */);
10457 ins_cost(0);
10458 ins_pipe(empty);
10459 %}
10460
10461 instruct castDD(regD dst)
10462 %{
10463 match(Set dst (CastDD dst));
10464
10465 size(0);
10466 format %{ "# castDD of $dst" %}
10467 ins_encode(/* empty encoding */);
10468 ins_cost(0);
10469 ins_pipe(empty);
10470 %}
10471
10472 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10473 instruct compareAndSwapP(rRegI res,
10474 memory mem_ptr,
10475 rax_RegP oldval, rRegP newval,
10476 rFlagsReg cr)
10477 %{
10478 predicate(n->as_LoadStore()->barrier_data() == 0);
10479 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10480 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10481 effect(KILL cr, KILL oldval);
10482
10483 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10484 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10485 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10486 ins_encode %{
10487 __ lock();
10488 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10489 __ setcc(Assembler::equal, $res$$Register);
10490 %}
10491 ins_pipe( pipe_cmpxchg );
10492 %}
10493
10494 instruct compareAndSwapL(rRegI res,
10495 memory mem_ptr,
10496 rax_RegL oldval, rRegL newval,
10497 rFlagsReg cr)
10498 %{
10499 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10500 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10501 effect(KILL cr, KILL oldval);
10502
10503 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10504 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10505 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10506 ins_encode %{
10507 __ lock();
10508 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10509 __ setcc(Assembler::equal, $res$$Register);
10510 %}
10511 ins_pipe( pipe_cmpxchg );
10512 %}
10513
10514 instruct compareAndSwapI(rRegI res,
10515 memory mem_ptr,
10516 rax_RegI oldval, rRegI newval,
10517 rFlagsReg cr)
10518 %{
10519 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10520 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10521 effect(KILL cr, KILL oldval);
10522
10523 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10524 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10525 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10526 ins_encode %{
10527 __ lock();
10528 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10529 __ setcc(Assembler::equal, $res$$Register);
10530 %}
10531 ins_pipe( pipe_cmpxchg );
10532 %}
10533
10534 instruct compareAndSwapB(rRegI res,
10535 memory mem_ptr,
10536 rax_RegI oldval, rRegI newval,
10537 rFlagsReg cr)
10538 %{
10539 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10540 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10541 effect(KILL cr, KILL oldval);
10542
10543 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10544 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10545 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10546 ins_encode %{
10547 __ lock();
10548 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10549 __ setcc(Assembler::equal, $res$$Register);
10550 %}
10551 ins_pipe( pipe_cmpxchg );
10552 %}
10553
10554 instruct compareAndSwapS(rRegI res,
10555 memory mem_ptr,
10556 rax_RegI oldval, rRegI newval,
10557 rFlagsReg cr)
10558 %{
10559 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10560 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10561 effect(KILL cr, KILL oldval);
10562
10563 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10564 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10565 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10566 ins_encode %{
10567 __ lock();
10568 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10569 __ setcc(Assembler::equal, $res$$Register);
10570 %}
10571 ins_pipe( pipe_cmpxchg );
10572 %}
10573
10574 instruct compareAndSwapN(rRegI res,
10575 memory mem_ptr,
10576 rax_RegN oldval, rRegN newval,
10577 rFlagsReg cr) %{
10578 predicate(n->as_LoadStore()->barrier_data() == 0);
10579 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10580 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10581 effect(KILL cr, KILL oldval);
10582
10583 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10584 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10585 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10586 ins_encode %{
10587 __ lock();
10588 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10589 __ setcc(Assembler::equal, $res$$Register);
10590 %}
10591 ins_pipe( pipe_cmpxchg );
10592 %}
10593
10594 instruct compareAndExchangeB(
10595 memory mem_ptr,
10596 rax_RegI oldval, rRegI newval,
10597 rFlagsReg cr)
10598 %{
10599 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10600 effect(KILL cr);
10601
10602 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10603 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10604 ins_encode %{
10605 __ lock();
10606 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10607 %}
10608 ins_pipe( pipe_cmpxchg );
10609 %}
10610
10611 instruct compareAndExchangeS(
10612 memory mem_ptr,
10613 rax_RegI oldval, rRegI newval,
10614 rFlagsReg cr)
10615 %{
10616 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10617 effect(KILL cr);
10618
10619 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10620 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10621 ins_encode %{
10622 __ lock();
10623 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10624 %}
10625 ins_pipe( pipe_cmpxchg );
10626 %}
10627
10628 instruct compareAndExchangeI(
10629 memory mem_ptr,
10630 rax_RegI oldval, rRegI newval,
10631 rFlagsReg cr)
10632 %{
10633 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10634 effect(KILL cr);
10635
10636 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10637 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10638 ins_encode %{
10639 __ lock();
10640 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10641 %}
10642 ins_pipe( pipe_cmpxchg );
10643 %}
10644
10645 instruct compareAndExchangeL(
10646 memory mem_ptr,
10647 rax_RegL oldval, rRegL newval,
10648 rFlagsReg cr)
10649 %{
10650 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10651 effect(KILL cr);
10652
10653 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10654 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10655 ins_encode %{
10656 __ lock();
10657 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10658 %}
10659 ins_pipe( pipe_cmpxchg );
10660 %}
10661
10662 instruct compareAndExchangeN(
10663 memory mem_ptr,
10664 rax_RegN oldval, rRegN newval,
10665 rFlagsReg cr) %{
10666 predicate(n->as_LoadStore()->barrier_data() == 0);
10667 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10668 effect(KILL cr);
10669
10670 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10671 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10672 ins_encode %{
10673 __ lock();
10674 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10675 %}
10676 ins_pipe( pipe_cmpxchg );
10677 %}
10678
10679 instruct compareAndExchangeP(
10680 memory mem_ptr,
10681 rax_RegP oldval, rRegP newval,
10682 rFlagsReg cr)
10683 %{
10684 predicate(n->as_LoadStore()->barrier_data() == 0);
10685 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10686 effect(KILL cr);
10687
10688 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10689 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10690 ins_encode %{
10691 __ lock();
10692 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10693 %}
10694 ins_pipe( pipe_cmpxchg );
10695 %}
10696
10697 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10698 predicate(n->as_LoadStore()->result_not_used());
10699 match(Set dummy (GetAndAddB mem add));
10700 effect(KILL cr);
10701 format %{ "addb_lock $mem, $add" %}
10702 ins_encode %{
10703 __ lock();
10704 __ addb($mem$$Address, $add$$Register);
10705 %}
10706 ins_pipe(pipe_cmpxchg);
10707 %}
10708
10709 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10710 predicate(n->as_LoadStore()->result_not_used());
10711 match(Set dummy (GetAndAddB mem add));
10712 effect(KILL cr);
10713 format %{ "addb_lock $mem, $add" %}
10714 ins_encode %{
10715 __ lock();
10716 __ addb($mem$$Address, $add$$constant);
10717 %}
10718 ins_pipe(pipe_cmpxchg);
10719 %}
10720
10721 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10722 predicate(!n->as_LoadStore()->result_not_used());
10723 match(Set newval (GetAndAddB mem newval));
10724 effect(KILL cr);
10725 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10726 ins_encode %{
10727 __ lock();
10728 __ xaddb($mem$$Address, $newval$$Register);
10729 __ narrow_subword_type($newval$$Register, T_BYTE);
10730 %}
10731 ins_pipe(pipe_cmpxchg);
10732 %}
10733
10734 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10735 predicate(n->as_LoadStore()->result_not_used());
10736 match(Set dummy (GetAndAddS mem add));
10737 effect(KILL cr);
10738 format %{ "addw_lock $mem, $add" %}
10739 ins_encode %{
10740 __ lock();
10741 __ addw($mem$$Address, $add$$Register);
10742 %}
10743 ins_pipe(pipe_cmpxchg);
10744 %}
10745
10746 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10747 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10748 match(Set dummy (GetAndAddS mem add));
10749 effect(KILL cr);
10750 format %{ "addw_lock $mem, $add" %}
10751 ins_encode %{
10752 __ lock();
10753 __ addw($mem$$Address, $add$$constant);
10754 %}
10755 ins_pipe(pipe_cmpxchg);
10756 %}
10757
10758 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10759 predicate(!n->as_LoadStore()->result_not_used());
10760 match(Set newval (GetAndAddS mem newval));
10761 effect(KILL cr);
10762 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
10763 ins_encode %{
10764 __ lock();
10765 __ xaddw($mem$$Address, $newval$$Register);
10766 __ narrow_subword_type($newval$$Register, T_SHORT);
10767 %}
10768 ins_pipe(pipe_cmpxchg);
10769 %}
10770
10771 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10772 predicate(n->as_LoadStore()->result_not_used());
10773 match(Set dummy (GetAndAddI mem add));
10774 effect(KILL cr);
10775 format %{ "addl_lock $mem, $add" %}
10776 ins_encode %{
10777 __ lock();
10778 __ addl($mem$$Address, $add$$Register);
10779 %}
10780 ins_pipe(pipe_cmpxchg);
10781 %}
10782
10783 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10784 predicate(n->as_LoadStore()->result_not_used());
10785 match(Set dummy (GetAndAddI mem add));
10786 effect(KILL cr);
10787 format %{ "addl_lock $mem, $add" %}
10788 ins_encode %{
10789 __ lock();
10790 __ addl($mem$$Address, $add$$constant);
10791 %}
10792 ins_pipe(pipe_cmpxchg);
10793 %}
10794
10795 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10796 predicate(!n->as_LoadStore()->result_not_used());
10797 match(Set newval (GetAndAddI mem newval));
10798 effect(KILL cr);
10799 format %{ "xaddl_lock $mem, $newval" %}
10800 ins_encode %{
10801 __ lock();
10802 __ xaddl($mem$$Address, $newval$$Register);
10803 %}
10804 ins_pipe(pipe_cmpxchg);
10805 %}
10806
10807 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10808 predicate(n->as_LoadStore()->result_not_used());
10809 match(Set dummy (GetAndAddL mem add));
10810 effect(KILL cr);
10811 format %{ "addq_lock $mem, $add" %}
10812 ins_encode %{
10813 __ lock();
10814 __ addq($mem$$Address, $add$$Register);
10815 %}
10816 ins_pipe(pipe_cmpxchg);
10817 %}
10818
10819 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10820 predicate(n->as_LoadStore()->result_not_used());
10821 match(Set dummy (GetAndAddL mem add));
10822 effect(KILL cr);
10823 format %{ "addq_lock $mem, $add" %}
10824 ins_encode %{
10825 __ lock();
10826 __ addq($mem$$Address, $add$$constant);
10827 %}
10828 ins_pipe(pipe_cmpxchg);
10829 %}
10830
10831 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10832 predicate(!n->as_LoadStore()->result_not_used());
10833 match(Set newval (GetAndAddL mem newval));
10834 effect(KILL cr);
10835 format %{ "xaddq_lock $mem, $newval" %}
10836 ins_encode %{
10837 __ lock();
10838 __ xaddq($mem$$Address, $newval$$Register);
10839 %}
10840 ins_pipe(pipe_cmpxchg);
10841 %}
10842
10843 instruct xchgB( memory mem, rRegI newval) %{
10844 match(Set newval (GetAndSetB mem newval));
10845 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
10846 ins_encode %{
10847 __ xchgb($newval$$Register, $mem$$Address);
10848 __ narrow_subword_type($newval$$Register, T_BYTE);
10849 %}
10850 ins_pipe( pipe_cmpxchg );
10851 %}
10852
10853 instruct xchgS( memory mem, rRegI newval) %{
10854 match(Set newval (GetAndSetS mem newval));
10855 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
10856 ins_encode %{
10857 __ xchgw($newval$$Register, $mem$$Address);
10858 __ narrow_subword_type($newval$$Register, T_SHORT);
10859 %}
10860 ins_pipe( pipe_cmpxchg );
10861 %}
10862
10863 instruct xchgI( memory mem, rRegI newval) %{
10864 match(Set newval (GetAndSetI mem newval));
10865 format %{ "XCHGL $newval,[$mem]" %}
10866 ins_encode %{
10867 __ xchgl($newval$$Register, $mem$$Address);
10868 %}
10869 ins_pipe( pipe_cmpxchg );
10870 %}
10871
10872 instruct xchgL( memory mem, rRegL newval) %{
10873 match(Set newval (GetAndSetL mem newval));
10874 format %{ "XCHGL $newval,[$mem]" %}
10875 ins_encode %{
10876 __ xchgq($newval$$Register, $mem$$Address);
10877 %}
10878 ins_pipe( pipe_cmpxchg );
10879 %}
10880
10881 instruct xchgP( memory mem, rRegP newval) %{
10882 match(Set newval (GetAndSetP mem newval));
10883 predicate(n->as_LoadStore()->barrier_data() == 0);
10884 format %{ "XCHGQ $newval,[$mem]" %}
10885 ins_encode %{
10886 __ xchgq($newval$$Register, $mem$$Address);
10887 %}
10888 ins_pipe( pipe_cmpxchg );
10889 %}
10890
10891 instruct xchgN( memory mem, rRegN newval) %{
10892 predicate(n->as_LoadStore()->barrier_data() == 0);
10893 match(Set newval (GetAndSetN mem newval));
10894 format %{ "XCHGL $newval,$mem]" %}
10895 ins_encode %{
10896 __ xchgl($newval$$Register, $mem$$Address);
10897 %}
10898 ins_pipe( pipe_cmpxchg );
10899 %}
10900
10901 //----------Abs Instructions-------------------------------------------
10902
10903 // Integer Absolute Instructions
10904 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10905 %{
10906 match(Set dst (AbsI src));
10907 effect(TEMP dst, KILL cr);
10908 format %{ "xorl $dst, $dst\t# abs int\n\t"
10909 "subl $dst, $src\n\t"
10910 "cmovll $dst, $src" %}
10911 ins_encode %{
10912 __ xorl($dst$$Register, $dst$$Register);
10913 __ subl($dst$$Register, $src$$Register);
10914 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10915 %}
10916
10917 ins_pipe(ialu_reg_reg);
10918 %}
10919
10920 // Long Absolute Instructions
10921 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10922 %{
10923 match(Set dst (AbsL src));
10924 effect(TEMP dst, KILL cr);
10925 format %{ "xorl $dst, $dst\t# abs long\n\t"
10926 "subq $dst, $src\n\t"
10927 "cmovlq $dst, $src" %}
10928 ins_encode %{
10929 __ xorl($dst$$Register, $dst$$Register);
10930 __ subq($dst$$Register, $src$$Register);
10931 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10932 %}
10933
10934 ins_pipe(ialu_reg_reg);
10935 %}
10936
10937 //----------Subtraction Instructions-------------------------------------------
10938
10939 // Integer Subtraction Instructions
10940 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10941 %{
10942 predicate(!UseAPX);
10943 match(Set dst (SubI dst src));
10944 effect(KILL cr);
10945 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10946
10947 format %{ "subl $dst, $src\t# int" %}
10948 ins_encode %{
10949 __ subl($dst$$Register, $src$$Register);
10950 %}
10951 ins_pipe(ialu_reg_reg);
10952 %}
10953
10954 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10955 %{
10956 predicate(UseAPX);
10957 match(Set dst (SubI src1 src2));
10958 effect(KILL cr);
10959 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10960
10961 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10962 ins_encode %{
10963 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10964 %}
10965 ins_pipe(ialu_reg_reg);
10966 %}
10967
10968 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10969 %{
10970 predicate(UseAPX);
10971 match(Set dst (SubI src1 src2));
10972 effect(KILL cr);
10973 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10974
10975 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10976 ins_encode %{
10977 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
10978 %}
10979 ins_pipe(ialu_reg_reg);
10980 %}
10981
10982 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10983 %{
10984 match(Set dst (SubI dst (LoadI src)));
10985 effect(KILL cr);
10986 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10987
10988 ins_cost(150);
10989 format %{ "subl $dst, $src\t# int" %}
10990 ins_encode %{
10991 __ subl($dst$$Register, $src$$Address);
10992 %}
10993 ins_pipe(ialu_reg_mem);
10994 %}
10995
10996 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10997 %{
10998 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
10999 effect(KILL cr);
11000 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11001
11002 ins_cost(150);
11003 format %{ "subl $dst, $src\t# int" %}
11004 ins_encode %{
11005 __ subl($dst$$Address, $src$$Register);
11006 %}
11007 ins_pipe(ialu_mem_reg);
11008 %}
11009
11010 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11011 %{
11012 predicate(!UseAPX);
11013 match(Set dst (SubL dst src));
11014 effect(KILL cr);
11015 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11016
11017 format %{ "subq $dst, $src\t# long" %}
11018 ins_encode %{
11019 __ subq($dst$$Register, $src$$Register);
11020 %}
11021 ins_pipe(ialu_reg_reg);
11022 %}
11023
11024 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11025 %{
11026 predicate(UseAPX);
11027 match(Set dst (SubL src1 src2));
11028 effect(KILL cr);
11029 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11030
11031 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11032 ins_encode %{
11033 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11034 %}
11035 ins_pipe(ialu_reg_reg);
11036 %}
11037
11038 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11039 %{
11040 predicate(UseAPX);
11041 match(Set dst (SubL src1 src2));
11042 effect(KILL cr);
11043 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11044
11045 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11046 ins_encode %{
11047 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11048 %}
11049 ins_pipe(ialu_reg_reg);
11050 %}
11051
11052 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11053 %{
11054 match(Set dst (SubL dst (LoadL src)));
11055 effect(KILL cr);
11056 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11057
11058 ins_cost(150);
11059 format %{ "subq $dst, $src\t# long" %}
11060 ins_encode %{
11061 __ subq($dst$$Register, $src$$Address);
11062 %}
11063 ins_pipe(ialu_reg_mem);
11064 %}
11065
11066 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11067 %{
11068 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11069 effect(KILL cr);
11070 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11071
11072 ins_cost(150);
11073 format %{ "subq $dst, $src\t# long" %}
11074 ins_encode %{
11075 __ subq($dst$$Address, $src$$Register);
11076 %}
11077 ins_pipe(ialu_mem_reg);
11078 %}
11079
11080 // Subtract from a pointer
11081 // XXX hmpf???
11082 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11083 %{
11084 match(Set dst (AddP dst (SubI zero src)));
11085 effect(KILL cr);
11086
11087 format %{ "subq $dst, $src\t# ptr - int" %}
11088 ins_encode %{
11089 __ subq($dst$$Register, $src$$Register);
11090 %}
11091 ins_pipe(ialu_reg_reg);
11092 %}
11093
11094 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11095 %{
11096 predicate(!UseAPX);
11097 match(Set dst (SubI zero dst));
11098 effect(KILL cr);
11099 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11100
11101 format %{ "negl $dst\t# int" %}
11102 ins_encode %{
11103 __ negl($dst$$Register);
11104 %}
11105 ins_pipe(ialu_reg);
11106 %}
11107
11108 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11109 %{
11110 predicate(UseAPX);
11111 match(Set dst (SubI zero src));
11112 effect(KILL cr);
11113 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11114
11115 format %{ "enegl $dst, $src\t# int ndd" %}
11116 ins_encode %{
11117 __ enegl($dst$$Register, $src$$Register, false);
11118 %}
11119 ins_pipe(ialu_reg);
11120 %}
11121
11122 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11123 %{
11124 predicate(!UseAPX);
11125 match(Set dst (NegI dst));
11126 effect(KILL cr);
11127 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11128
11129 format %{ "negl $dst\t# int" %}
11130 ins_encode %{
11131 __ negl($dst$$Register);
11132 %}
11133 ins_pipe(ialu_reg);
11134 %}
11135
11136 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11137 %{
11138 predicate(UseAPX);
11139 match(Set dst (NegI src));
11140 effect(KILL cr);
11141 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11142
11143 format %{ "enegl $dst, $src\t# int ndd" %}
11144 ins_encode %{
11145 __ enegl($dst$$Register, $src$$Register, false);
11146 %}
11147 ins_pipe(ialu_reg);
11148 %}
11149
11150 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11151 %{
11152 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11153 effect(KILL cr);
11154 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11155
11156 format %{ "negl $dst\t# int" %}
11157 ins_encode %{
11158 __ negl($dst$$Address);
11159 %}
11160 ins_pipe(ialu_reg);
11161 %}
11162
11163 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11164 %{
11165 predicate(!UseAPX);
11166 match(Set dst (SubL zero dst));
11167 effect(KILL cr);
11168 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11169
11170 format %{ "negq $dst\t# long" %}
11171 ins_encode %{
11172 __ negq($dst$$Register);
11173 %}
11174 ins_pipe(ialu_reg);
11175 %}
11176
11177 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11178 %{
11179 predicate(UseAPX);
11180 match(Set dst (SubL zero src));
11181 effect(KILL cr);
11182 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11183
11184 format %{ "enegq $dst, $src\t# long ndd" %}
11185 ins_encode %{
11186 __ enegq($dst$$Register, $src$$Register, false);
11187 %}
11188 ins_pipe(ialu_reg);
11189 %}
11190
11191 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11192 %{
11193 predicate(!UseAPX);
11194 match(Set dst (NegL dst));
11195 effect(KILL cr);
11196 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11197
11198 format %{ "negq $dst\t# int" %}
11199 ins_encode %{
11200 __ negq($dst$$Register);
11201 %}
11202 ins_pipe(ialu_reg);
11203 %}
11204
11205 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11206 %{
11207 predicate(UseAPX);
11208 match(Set dst (NegL src));
11209 effect(KILL cr);
11210 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11211
11212 format %{ "enegq $dst, $src\t# long ndd" %}
11213 ins_encode %{
11214 __ enegq($dst$$Register, $src$$Register, false);
11215 %}
11216 ins_pipe(ialu_reg);
11217 %}
11218
11219 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11220 %{
11221 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11222 effect(KILL cr);
11223 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11224
11225 format %{ "negq $dst\t# long" %}
11226 ins_encode %{
11227 __ negq($dst$$Address);
11228 %}
11229 ins_pipe(ialu_reg);
11230 %}
11231
11232 //----------Multiplication/Division Instructions-------------------------------
11233 // Integer Multiplication Instructions
11234 // Multiply Register
11235
11236 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11237 %{
11238 predicate(!UseAPX);
11239 match(Set dst (MulI dst src));
11240 effect(KILL cr);
11241
11242 ins_cost(300);
11243 format %{ "imull $dst, $src\t# int" %}
11244 ins_encode %{
11245 __ imull($dst$$Register, $src$$Register);
11246 %}
11247 ins_pipe(ialu_reg_reg_alu0);
11248 %}
11249
11250 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11251 %{
11252 predicate(UseAPX);
11253 match(Set dst (MulI src1 src2));
11254 effect(KILL cr);
11255 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11256
11257 ins_cost(300);
11258 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11259 ins_encode %{
11260 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11261 %}
11262 ins_pipe(ialu_reg_reg_alu0);
11263 %}
11264
11265 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11266 %{
11267 match(Set dst (MulI src imm));
11268 effect(KILL cr);
11269
11270 ins_cost(300);
11271 format %{ "imull $dst, $src, $imm\t# int" %}
11272 ins_encode %{
11273 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11274 %}
11275 ins_pipe(ialu_reg_reg_alu0);
11276 %}
11277
11278 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11279 %{
11280 match(Set dst (MulI dst (LoadI src)));
11281 effect(KILL cr);
11282
11283 ins_cost(350);
11284 format %{ "imull $dst, $src\t# int" %}
11285 ins_encode %{
11286 __ imull($dst$$Register, $src$$Address);
11287 %}
11288 ins_pipe(ialu_reg_mem_alu0);
11289 %}
11290
11291 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11292 %{
11293 match(Set dst (MulI (LoadI src) imm));
11294 effect(KILL cr);
11295
11296 ins_cost(300);
11297 format %{ "imull $dst, $src, $imm\t# int" %}
11298 ins_encode %{
11299 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11300 %}
11301 ins_pipe(ialu_reg_mem_alu0);
11302 %}
11303
11304 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11305 %{
11306 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11307 effect(KILL cr, KILL src2);
11308
11309 expand %{ mulI_rReg(dst, src1, cr);
11310 mulI_rReg(src2, src3, cr);
11311 addI_rReg(dst, src2, cr); %}
11312 %}
11313
11314 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11315 %{
11316 predicate(!UseAPX);
11317 match(Set dst (MulL dst src));
11318 effect(KILL cr);
11319
11320 ins_cost(300);
11321 format %{ "imulq $dst, $src\t# long" %}
11322 ins_encode %{
11323 __ imulq($dst$$Register, $src$$Register);
11324 %}
11325 ins_pipe(ialu_reg_reg_alu0);
11326 %}
11327
11328 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11329 %{
11330 predicate(UseAPX);
11331 match(Set dst (MulL src1 src2));
11332 effect(KILL cr);
11333 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11334
11335 ins_cost(300);
11336 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11337 ins_encode %{
11338 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11339 %}
11340 ins_pipe(ialu_reg_reg_alu0);
11341 %}
11342
11343 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11344 %{
11345 match(Set dst (MulL src imm));
11346 effect(KILL cr);
11347
11348 ins_cost(300);
11349 format %{ "imulq $dst, $src, $imm\t# long" %}
11350 ins_encode %{
11351 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11352 %}
11353 ins_pipe(ialu_reg_reg_alu0);
11354 %}
11355
11356 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11357 %{
11358 match(Set dst (MulL dst (LoadL src)));
11359 effect(KILL cr);
11360
11361 ins_cost(350);
11362 format %{ "imulq $dst, $src\t# long" %}
11363 ins_encode %{
11364 __ imulq($dst$$Register, $src$$Address);
11365 %}
11366 ins_pipe(ialu_reg_mem_alu0);
11367 %}
11368
11369
11370 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11371 %{
11372 match(Set dst (MulL (LoadL src) imm));
11373 effect(KILL cr);
11374
11375 ins_cost(300);
11376 format %{ "imulq $dst, $src, $imm\t# long" %}
11377 ins_encode %{
11378 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11379 %}
11380 ins_pipe(ialu_reg_mem_alu0);
11381 %}
11382
11383 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11384 %{
11385 match(Set dst (MulHiL src rax));
11386 effect(USE_KILL rax, KILL cr);
11387
11388 ins_cost(300);
11389 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11390 ins_encode %{
11391 __ imulq($src$$Register);
11392 %}
11393 ins_pipe(ialu_reg_reg_alu0);
11394 %}
11395
11396 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11397 %{
11398 match(Set dst (UMulHiL src rax));
11399 effect(USE_KILL rax, KILL cr);
11400
11401 ins_cost(300);
11402 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11403 ins_encode %{
11404 __ mulq($src$$Register);
11405 %}
11406 ins_pipe(ialu_reg_reg_alu0);
11407 %}
11408
11409 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11410 rFlagsReg cr)
11411 %{
11412 match(Set rax (DivI rax div));
11413 effect(KILL rdx, KILL cr);
11414
11415 ins_cost(30*100+10*100); // XXX
11416 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11417 "jne,s normal\n\t"
11418 "xorl rdx, rdx\n\t"
11419 "cmpl $div, -1\n\t"
11420 "je,s done\n"
11421 "normal: cdql\n\t"
11422 "idivl $div\n"
11423 "done:" %}
11424 ins_encode(cdql_enc(div));
11425 ins_pipe(ialu_reg_reg_alu0);
11426 %}
11427
11428 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11429 rFlagsReg cr)
11430 %{
11431 match(Set rax (DivL rax div));
11432 effect(KILL rdx, KILL cr);
11433
11434 ins_cost(30*100+10*100); // XXX
11435 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11436 "cmpq rax, rdx\n\t"
11437 "jne,s normal\n\t"
11438 "xorl rdx, rdx\n\t"
11439 "cmpq $div, -1\n\t"
11440 "je,s done\n"
11441 "normal: cdqq\n\t"
11442 "idivq $div\n"
11443 "done:" %}
11444 ins_encode(cdqq_enc(div));
11445 ins_pipe(ialu_reg_reg_alu0);
11446 %}
11447
11448 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11449 %{
11450 match(Set rax (UDivI rax div));
11451 effect(KILL rdx, KILL cr);
11452
11453 ins_cost(300);
11454 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11455 ins_encode %{
11456 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11457 %}
11458 ins_pipe(ialu_reg_reg_alu0);
11459 %}
11460
11461 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11462 %{
11463 match(Set rax (UDivL rax div));
11464 effect(KILL rdx, KILL cr);
11465
11466 ins_cost(300);
11467 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11468 ins_encode %{
11469 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11470 %}
11471 ins_pipe(ialu_reg_reg_alu0);
11472 %}
11473
11474 // Integer DIVMOD with Register, both quotient and mod results
11475 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11476 rFlagsReg cr)
11477 %{
11478 match(DivModI rax div);
11479 effect(KILL cr);
11480
11481 ins_cost(30*100+10*100); // XXX
11482 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11483 "jne,s normal\n\t"
11484 "xorl rdx, rdx\n\t"
11485 "cmpl $div, -1\n\t"
11486 "je,s done\n"
11487 "normal: cdql\n\t"
11488 "idivl $div\n"
11489 "done:" %}
11490 ins_encode(cdql_enc(div));
11491 ins_pipe(pipe_slow);
11492 %}
11493
11494 // Long DIVMOD with Register, both quotient and mod results
11495 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11496 rFlagsReg cr)
11497 %{
11498 match(DivModL rax div);
11499 effect(KILL cr);
11500
11501 ins_cost(30*100+10*100); // XXX
11502 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11503 "cmpq rax, rdx\n\t"
11504 "jne,s normal\n\t"
11505 "xorl rdx, rdx\n\t"
11506 "cmpq $div, -1\n\t"
11507 "je,s done\n"
11508 "normal: cdqq\n\t"
11509 "idivq $div\n"
11510 "done:" %}
11511 ins_encode(cdqq_enc(div));
11512 ins_pipe(pipe_slow);
11513 %}
11514
11515 // Unsigned integer DIVMOD with Register, both quotient and mod results
11516 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11517 no_rax_rdx_RegI div, rFlagsReg cr)
11518 %{
11519 match(UDivModI rax div);
11520 effect(TEMP tmp, KILL cr);
11521
11522 ins_cost(300);
11523 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11524 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11525 %}
11526 ins_encode %{
11527 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11528 %}
11529 ins_pipe(pipe_slow);
11530 %}
11531
11532 // Unsigned long DIVMOD with Register, both quotient and mod results
11533 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11534 no_rax_rdx_RegL div, rFlagsReg cr)
11535 %{
11536 match(UDivModL rax div);
11537 effect(TEMP tmp, KILL cr);
11538
11539 ins_cost(300);
11540 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11541 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11542 %}
11543 ins_encode %{
11544 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11545 %}
11546 ins_pipe(pipe_slow);
11547 %}
11548
11549 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11550 rFlagsReg cr)
11551 %{
11552 match(Set rdx (ModI rax div));
11553 effect(KILL rax, KILL cr);
11554
11555 ins_cost(300); // XXX
11556 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11557 "jne,s normal\n\t"
11558 "xorl rdx, rdx\n\t"
11559 "cmpl $div, -1\n\t"
11560 "je,s done\n"
11561 "normal: cdql\n\t"
11562 "idivl $div\n"
11563 "done:" %}
11564 ins_encode(cdql_enc(div));
11565 ins_pipe(ialu_reg_reg_alu0);
11566 %}
11567
11568 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11569 rFlagsReg cr)
11570 %{
11571 match(Set rdx (ModL rax div));
11572 effect(KILL rax, KILL cr);
11573
11574 ins_cost(300); // XXX
11575 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11576 "cmpq rax, rdx\n\t"
11577 "jne,s normal\n\t"
11578 "xorl rdx, rdx\n\t"
11579 "cmpq $div, -1\n\t"
11580 "je,s done\n"
11581 "normal: cdqq\n\t"
11582 "idivq $div\n"
11583 "done:" %}
11584 ins_encode(cdqq_enc(div));
11585 ins_pipe(ialu_reg_reg_alu0);
11586 %}
11587
11588 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11589 %{
11590 match(Set rdx (UModI rax div));
11591 effect(KILL rax, KILL cr);
11592
11593 ins_cost(300);
11594 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11595 ins_encode %{
11596 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11597 %}
11598 ins_pipe(ialu_reg_reg_alu0);
11599 %}
11600
11601 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11602 %{
11603 match(Set rdx (UModL rax div));
11604 effect(KILL rax, KILL cr);
11605
11606 ins_cost(300);
11607 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11608 ins_encode %{
11609 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11610 %}
11611 ins_pipe(ialu_reg_reg_alu0);
11612 %}
11613
11614 // Integer Shift Instructions
11615 // Shift Left by one, two, three
11616 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11617 %{
11618 predicate(!UseAPX);
11619 match(Set dst (LShiftI dst shift));
11620 effect(KILL cr);
11621
11622 format %{ "sall $dst, $shift" %}
11623 ins_encode %{
11624 __ sall($dst$$Register, $shift$$constant);
11625 %}
11626 ins_pipe(ialu_reg);
11627 %}
11628
11629 // Shift Left by one, two, three
11630 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11631 %{
11632 predicate(UseAPX);
11633 match(Set dst (LShiftI src shift));
11634 effect(KILL cr);
11635 flag(PD::Flag_ndd_demotable_opr1);
11636
11637 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11638 ins_encode %{
11639 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11640 %}
11641 ins_pipe(ialu_reg);
11642 %}
11643
11644 // Shift Left by 8-bit immediate
11645 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11646 %{
11647 predicate(!UseAPX);
11648 match(Set dst (LShiftI dst shift));
11649 effect(KILL cr);
11650
11651 format %{ "sall $dst, $shift" %}
11652 ins_encode %{
11653 __ sall($dst$$Register, $shift$$constant);
11654 %}
11655 ins_pipe(ialu_reg);
11656 %}
11657
11658 // Shift Left by 8-bit immediate
11659 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11660 %{
11661 predicate(UseAPX);
11662 match(Set dst (LShiftI src shift));
11663 effect(KILL cr);
11664 flag(PD::Flag_ndd_demotable_opr1);
11665
11666 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11667 ins_encode %{
11668 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11669 %}
11670 ins_pipe(ialu_reg);
11671 %}
11672
11673 // Shift Left by 8-bit immediate
11674 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11675 %{
11676 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11677 effect(KILL cr);
11678
11679 format %{ "sall $dst, $shift" %}
11680 ins_encode %{
11681 __ sall($dst$$Address, $shift$$constant);
11682 %}
11683 ins_pipe(ialu_mem_imm);
11684 %}
11685
11686 // Shift Left by variable
11687 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11688 %{
11689 predicate(!VM_Version::supports_bmi2());
11690 match(Set dst (LShiftI dst shift));
11691 effect(KILL cr);
11692
11693 format %{ "sall $dst, $shift" %}
11694 ins_encode %{
11695 __ sall($dst$$Register);
11696 %}
11697 ins_pipe(ialu_reg_reg);
11698 %}
11699
11700 // Shift Left by variable
11701 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11702 %{
11703 predicate(!VM_Version::supports_bmi2());
11704 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11705 effect(KILL cr);
11706
11707 format %{ "sall $dst, $shift" %}
11708 ins_encode %{
11709 __ sall($dst$$Address);
11710 %}
11711 ins_pipe(ialu_mem_reg);
11712 %}
11713
11714 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11715 %{
11716 predicate(VM_Version::supports_bmi2());
11717 match(Set dst (LShiftI src shift));
11718
11719 format %{ "shlxl $dst, $src, $shift" %}
11720 ins_encode %{
11721 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11722 %}
11723 ins_pipe(ialu_reg_reg);
11724 %}
11725
11726 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11727 %{
11728 predicate(VM_Version::supports_bmi2());
11729 match(Set dst (LShiftI (LoadI src) shift));
11730 ins_cost(175);
11731 format %{ "shlxl $dst, $src, $shift" %}
11732 ins_encode %{
11733 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11734 %}
11735 ins_pipe(ialu_reg_mem);
11736 %}
11737
11738 // Arithmetic Shift Right by 8-bit immediate
11739 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11740 %{
11741 predicate(!UseAPX);
11742 match(Set dst (RShiftI dst shift));
11743 effect(KILL cr);
11744
11745 format %{ "sarl $dst, $shift" %}
11746 ins_encode %{
11747 __ sarl($dst$$Register, $shift$$constant);
11748 %}
11749 ins_pipe(ialu_mem_imm);
11750 %}
11751
11752 // Arithmetic Shift Right by 8-bit immediate
11753 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11754 %{
11755 predicate(UseAPX);
11756 match(Set dst (RShiftI src shift));
11757 effect(KILL cr);
11758 flag(PD::Flag_ndd_demotable_opr1);
11759
11760 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11761 ins_encode %{
11762 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11763 %}
11764 ins_pipe(ialu_mem_imm);
11765 %}
11766
11767 // Arithmetic Shift Right by 8-bit immediate
11768 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11769 %{
11770 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11771 effect(KILL cr);
11772
11773 format %{ "sarl $dst, $shift" %}
11774 ins_encode %{
11775 __ sarl($dst$$Address, $shift$$constant);
11776 %}
11777 ins_pipe(ialu_mem_imm);
11778 %}
11779
11780 // Arithmetic Shift Right by variable
11781 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11782 %{
11783 predicate(!VM_Version::supports_bmi2());
11784 match(Set dst (RShiftI dst shift));
11785 effect(KILL cr);
11786
11787 format %{ "sarl $dst, $shift" %}
11788 ins_encode %{
11789 __ sarl($dst$$Register);
11790 %}
11791 ins_pipe(ialu_reg_reg);
11792 %}
11793
11794 // Arithmetic Shift Right by variable
11795 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11796 %{
11797 predicate(!VM_Version::supports_bmi2());
11798 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11799 effect(KILL cr);
11800
11801 format %{ "sarl $dst, $shift" %}
11802 ins_encode %{
11803 __ sarl($dst$$Address);
11804 %}
11805 ins_pipe(ialu_mem_reg);
11806 %}
11807
11808 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11809 %{
11810 predicate(VM_Version::supports_bmi2());
11811 match(Set dst (RShiftI src shift));
11812
11813 format %{ "sarxl $dst, $src, $shift" %}
11814 ins_encode %{
11815 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11816 %}
11817 ins_pipe(ialu_reg_reg);
11818 %}
11819
11820 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11821 %{
11822 predicate(VM_Version::supports_bmi2());
11823 match(Set dst (RShiftI (LoadI src) shift));
11824 ins_cost(175);
11825 format %{ "sarxl $dst, $src, $shift" %}
11826 ins_encode %{
11827 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11828 %}
11829 ins_pipe(ialu_reg_mem);
11830 %}
11831
11832 // Logical Shift Right by 8-bit immediate
11833 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11834 %{
11835 predicate(!UseAPX);
11836 match(Set dst (URShiftI dst shift));
11837 effect(KILL cr);
11838
11839 format %{ "shrl $dst, $shift" %}
11840 ins_encode %{
11841 __ shrl($dst$$Register, $shift$$constant);
11842 %}
11843 ins_pipe(ialu_reg);
11844 %}
11845
11846 // Logical Shift Right by 8-bit immediate
11847 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11848 %{
11849 predicate(UseAPX);
11850 match(Set dst (URShiftI src shift));
11851 effect(KILL cr);
11852 flag(PD::Flag_ndd_demotable_opr1);
11853
11854 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
11855 ins_encode %{
11856 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
11857 %}
11858 ins_pipe(ialu_reg);
11859 %}
11860
11861 // Logical Shift Right by 8-bit immediate
11862 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11863 %{
11864 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11865 effect(KILL cr);
11866
11867 format %{ "shrl $dst, $shift" %}
11868 ins_encode %{
11869 __ shrl($dst$$Address, $shift$$constant);
11870 %}
11871 ins_pipe(ialu_mem_imm);
11872 %}
11873
11874 // Logical Shift Right by variable
11875 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11876 %{
11877 predicate(!VM_Version::supports_bmi2());
11878 match(Set dst (URShiftI dst shift));
11879 effect(KILL cr);
11880
11881 format %{ "shrl $dst, $shift" %}
11882 ins_encode %{
11883 __ shrl($dst$$Register);
11884 %}
11885 ins_pipe(ialu_reg_reg);
11886 %}
11887
11888 // Logical Shift Right by variable
11889 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11890 %{
11891 predicate(!VM_Version::supports_bmi2());
11892 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11893 effect(KILL cr);
11894
11895 format %{ "shrl $dst, $shift" %}
11896 ins_encode %{
11897 __ shrl($dst$$Address);
11898 %}
11899 ins_pipe(ialu_mem_reg);
11900 %}
11901
11902 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11903 %{
11904 predicate(VM_Version::supports_bmi2());
11905 match(Set dst (URShiftI src shift));
11906
11907 format %{ "shrxl $dst, $src, $shift" %}
11908 ins_encode %{
11909 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
11910 %}
11911 ins_pipe(ialu_reg_reg);
11912 %}
11913
11914 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
11915 %{
11916 predicate(VM_Version::supports_bmi2());
11917 match(Set dst (URShiftI (LoadI src) shift));
11918 ins_cost(175);
11919 format %{ "shrxl $dst, $src, $shift" %}
11920 ins_encode %{
11921 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
11922 %}
11923 ins_pipe(ialu_reg_mem);
11924 %}
11925
11926 // Long Shift Instructions
11927 // Shift Left by one, two, three
11928 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
11929 %{
11930 predicate(!UseAPX);
11931 match(Set dst (LShiftL dst shift));
11932 effect(KILL cr);
11933
11934 format %{ "salq $dst, $shift" %}
11935 ins_encode %{
11936 __ salq($dst$$Register, $shift$$constant);
11937 %}
11938 ins_pipe(ialu_reg);
11939 %}
11940
11941 // Shift Left by one, two, three
11942 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
11943 %{
11944 predicate(UseAPX);
11945 match(Set dst (LShiftL src shift));
11946 effect(KILL cr);
11947 flag(PD::Flag_ndd_demotable_opr1);
11948
11949 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
11950 ins_encode %{
11951 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11952 %}
11953 ins_pipe(ialu_reg);
11954 %}
11955
11956 // Shift Left by 8-bit immediate
11957 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
11958 %{
11959 predicate(!UseAPX);
11960 match(Set dst (LShiftL dst shift));
11961 effect(KILL cr);
11962
11963 format %{ "salq $dst, $shift" %}
11964 ins_encode %{
11965 __ salq($dst$$Register, $shift$$constant);
11966 %}
11967 ins_pipe(ialu_reg);
11968 %}
11969
11970 // Shift Left by 8-bit immediate
11971 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
11972 %{
11973 predicate(UseAPX);
11974 match(Set dst (LShiftL src shift));
11975 effect(KILL cr);
11976 flag(PD::Flag_ndd_demotable_opr1);
11977
11978 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
11979 ins_encode %{
11980 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11981 %}
11982 ins_pipe(ialu_reg);
11983 %}
11984
11985 // Shift Left by 8-bit immediate
11986 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11987 %{
11988 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
11989 effect(KILL cr);
11990
11991 format %{ "salq $dst, $shift" %}
11992 ins_encode %{
11993 __ salq($dst$$Address, $shift$$constant);
11994 %}
11995 ins_pipe(ialu_mem_imm);
11996 %}
11997
11998 // Shift Left by variable
11999 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12000 %{
12001 predicate(!VM_Version::supports_bmi2());
12002 match(Set dst (LShiftL dst shift));
12003 effect(KILL cr);
12004
12005 format %{ "salq $dst, $shift" %}
12006 ins_encode %{
12007 __ salq($dst$$Register);
12008 %}
12009 ins_pipe(ialu_reg_reg);
12010 %}
12011
12012 // Shift Left by variable
12013 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12014 %{
12015 predicate(!VM_Version::supports_bmi2());
12016 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12017 effect(KILL cr);
12018
12019 format %{ "salq $dst, $shift" %}
12020 ins_encode %{
12021 __ salq($dst$$Address);
12022 %}
12023 ins_pipe(ialu_mem_reg);
12024 %}
12025
12026 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12027 %{
12028 predicate(VM_Version::supports_bmi2());
12029 match(Set dst (LShiftL src shift));
12030
12031 format %{ "shlxq $dst, $src, $shift" %}
12032 ins_encode %{
12033 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12034 %}
12035 ins_pipe(ialu_reg_reg);
12036 %}
12037
12038 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12039 %{
12040 predicate(VM_Version::supports_bmi2());
12041 match(Set dst (LShiftL (LoadL src) shift));
12042 ins_cost(175);
12043 format %{ "shlxq $dst, $src, $shift" %}
12044 ins_encode %{
12045 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12046 %}
12047 ins_pipe(ialu_reg_mem);
12048 %}
12049
12050 // Arithmetic Shift Right by 8-bit immediate
12051 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12052 %{
12053 predicate(!UseAPX);
12054 match(Set dst (RShiftL dst shift));
12055 effect(KILL cr);
12056
12057 format %{ "sarq $dst, $shift" %}
12058 ins_encode %{
12059 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12060 %}
12061 ins_pipe(ialu_mem_imm);
12062 %}
12063
12064 // Arithmetic Shift Right by 8-bit immediate
12065 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12066 %{
12067 predicate(UseAPX);
12068 match(Set dst (RShiftL src shift));
12069 effect(KILL cr);
12070 flag(PD::Flag_ndd_demotable_opr1);
12071
12072 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12073 ins_encode %{
12074 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12075 %}
12076 ins_pipe(ialu_mem_imm);
12077 %}
12078
12079 // Arithmetic Shift Right by 8-bit immediate
12080 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12081 %{
12082 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12083 effect(KILL cr);
12084
12085 format %{ "sarq $dst, $shift" %}
12086 ins_encode %{
12087 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12088 %}
12089 ins_pipe(ialu_mem_imm);
12090 %}
12091
12092 // Arithmetic Shift Right by variable
12093 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12094 %{
12095 predicate(!VM_Version::supports_bmi2());
12096 match(Set dst (RShiftL dst shift));
12097 effect(KILL cr);
12098
12099 format %{ "sarq $dst, $shift" %}
12100 ins_encode %{
12101 __ sarq($dst$$Register);
12102 %}
12103 ins_pipe(ialu_reg_reg);
12104 %}
12105
12106 // Arithmetic Shift Right by variable
12107 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12108 %{
12109 predicate(!VM_Version::supports_bmi2());
12110 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12111 effect(KILL cr);
12112
12113 format %{ "sarq $dst, $shift" %}
12114 ins_encode %{
12115 __ sarq($dst$$Address);
12116 %}
12117 ins_pipe(ialu_mem_reg);
12118 %}
12119
12120 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12121 %{
12122 predicate(VM_Version::supports_bmi2());
12123 match(Set dst (RShiftL src shift));
12124
12125 format %{ "sarxq $dst, $src, $shift" %}
12126 ins_encode %{
12127 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12128 %}
12129 ins_pipe(ialu_reg_reg);
12130 %}
12131
12132 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12133 %{
12134 predicate(VM_Version::supports_bmi2());
12135 match(Set dst (RShiftL (LoadL src) shift));
12136 ins_cost(175);
12137 format %{ "sarxq $dst, $src, $shift" %}
12138 ins_encode %{
12139 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12140 %}
12141 ins_pipe(ialu_reg_mem);
12142 %}
12143
12144 // Logical Shift Right by 8-bit immediate
12145 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12146 %{
12147 predicate(!UseAPX);
12148 match(Set dst (URShiftL dst shift));
12149 effect(KILL cr);
12150
12151 format %{ "shrq $dst, $shift" %}
12152 ins_encode %{
12153 __ shrq($dst$$Register, $shift$$constant);
12154 %}
12155 ins_pipe(ialu_reg);
12156 %}
12157
12158 // Logical Shift Right by 8-bit immediate
12159 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12160 %{
12161 predicate(UseAPX);
12162 match(Set dst (URShiftL src shift));
12163 effect(KILL cr);
12164 flag(PD::Flag_ndd_demotable_opr1);
12165
12166 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12167 ins_encode %{
12168 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12169 %}
12170 ins_pipe(ialu_reg);
12171 %}
12172
12173 // Logical Shift Right by 8-bit immediate
12174 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12175 %{
12176 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12177 effect(KILL cr);
12178
12179 format %{ "shrq $dst, $shift" %}
12180 ins_encode %{
12181 __ shrq($dst$$Address, $shift$$constant);
12182 %}
12183 ins_pipe(ialu_mem_imm);
12184 %}
12185
12186 // Logical Shift Right by variable
12187 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12188 %{
12189 predicate(!VM_Version::supports_bmi2());
12190 match(Set dst (URShiftL dst shift));
12191 effect(KILL cr);
12192
12193 format %{ "shrq $dst, $shift" %}
12194 ins_encode %{
12195 __ shrq($dst$$Register);
12196 %}
12197 ins_pipe(ialu_reg_reg);
12198 %}
12199
12200 // Logical Shift Right by variable
12201 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12202 %{
12203 predicate(!VM_Version::supports_bmi2());
12204 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12205 effect(KILL cr);
12206
12207 format %{ "shrq $dst, $shift" %}
12208 ins_encode %{
12209 __ shrq($dst$$Address);
12210 %}
12211 ins_pipe(ialu_mem_reg);
12212 %}
12213
12214 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12215 %{
12216 predicate(VM_Version::supports_bmi2());
12217 match(Set dst (URShiftL src shift));
12218
12219 format %{ "shrxq $dst, $src, $shift" %}
12220 ins_encode %{
12221 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12222 %}
12223 ins_pipe(ialu_reg_reg);
12224 %}
12225
12226 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12227 %{
12228 predicate(VM_Version::supports_bmi2());
12229 match(Set dst (URShiftL (LoadL src) shift));
12230 ins_cost(175);
12231 format %{ "shrxq $dst, $src, $shift" %}
12232 ins_encode %{
12233 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12234 %}
12235 ins_pipe(ialu_reg_mem);
12236 %}
12237
12238 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12239 // This idiom is used by the compiler for the i2b bytecode.
12240 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12241 %{
12242 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12243
12244 format %{ "movsbl $dst, $src\t# i2b" %}
12245 ins_encode %{
12246 __ movsbl($dst$$Register, $src$$Register);
12247 %}
12248 ins_pipe(ialu_reg_reg);
12249 %}
12250
12251 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12252 // This idiom is used by the compiler the i2s bytecode.
12253 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12254 %{
12255 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12256
12257 format %{ "movswl $dst, $src\t# i2s" %}
12258 ins_encode %{
12259 __ movswl($dst$$Register, $src$$Register);
12260 %}
12261 ins_pipe(ialu_reg_reg);
12262 %}
12263
12264 // ROL/ROR instructions
12265
12266 // Rotate left by constant.
12267 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12268 %{
12269 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12270 match(Set dst (RotateLeft dst shift));
12271 effect(KILL cr);
12272 format %{ "roll $dst, $shift" %}
12273 ins_encode %{
12274 __ roll($dst$$Register, $shift$$constant);
12275 %}
12276 ins_pipe(ialu_reg);
12277 %}
12278
12279 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12280 %{
12281 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12282 match(Set dst (RotateLeft src shift));
12283 format %{ "rolxl $dst, $src, $shift" %}
12284 ins_encode %{
12285 int shift = 32 - ($shift$$constant & 31);
12286 __ rorxl($dst$$Register, $src$$Register, shift);
12287 %}
12288 ins_pipe(ialu_reg_reg);
12289 %}
12290
12291 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12292 %{
12293 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12294 match(Set dst (RotateLeft (LoadI src) shift));
12295 ins_cost(175);
12296 format %{ "rolxl $dst, $src, $shift" %}
12297 ins_encode %{
12298 int shift = 32 - ($shift$$constant & 31);
12299 __ rorxl($dst$$Register, $src$$Address, shift);
12300 %}
12301 ins_pipe(ialu_reg_mem);
12302 %}
12303
12304 // Rotate Left by variable
12305 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12306 %{
12307 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12308 match(Set dst (RotateLeft dst shift));
12309 effect(KILL cr);
12310 format %{ "roll $dst, $shift" %}
12311 ins_encode %{
12312 __ roll($dst$$Register);
12313 %}
12314 ins_pipe(ialu_reg_reg);
12315 %}
12316
12317 // Rotate Left by variable
12318 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12319 %{
12320 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12321 match(Set dst (RotateLeft src shift));
12322 effect(KILL cr);
12323 flag(PD::Flag_ndd_demotable_opr1);
12324
12325 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12326 ins_encode %{
12327 __ eroll($dst$$Register, $src$$Register, false);
12328 %}
12329 ins_pipe(ialu_reg_reg);
12330 %}
12331
12332 // Rotate Right by constant.
12333 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12334 %{
12335 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12336 match(Set dst (RotateRight dst shift));
12337 effect(KILL cr);
12338 format %{ "rorl $dst, $shift" %}
12339 ins_encode %{
12340 __ rorl($dst$$Register, $shift$$constant);
12341 %}
12342 ins_pipe(ialu_reg);
12343 %}
12344
12345 // Rotate Right by constant.
12346 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12347 %{
12348 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12349 match(Set dst (RotateRight src shift));
12350 format %{ "rorxl $dst, $src, $shift" %}
12351 ins_encode %{
12352 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12353 %}
12354 ins_pipe(ialu_reg_reg);
12355 %}
12356
12357 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12358 %{
12359 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12360 match(Set dst (RotateRight (LoadI src) shift));
12361 ins_cost(175);
12362 format %{ "rorxl $dst, $src, $shift" %}
12363 ins_encode %{
12364 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12365 %}
12366 ins_pipe(ialu_reg_mem);
12367 %}
12368
12369 // Rotate Right by variable
12370 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12371 %{
12372 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12373 match(Set dst (RotateRight dst shift));
12374 effect(KILL cr);
12375 format %{ "rorl $dst, $shift" %}
12376 ins_encode %{
12377 __ rorl($dst$$Register);
12378 %}
12379 ins_pipe(ialu_reg_reg);
12380 %}
12381
12382 // Rotate Right by variable
12383 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12384 %{
12385 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12386 match(Set dst (RotateRight src shift));
12387 effect(KILL cr);
12388 flag(PD::Flag_ndd_demotable_opr1);
12389
12390 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12391 ins_encode %{
12392 __ erorl($dst$$Register, $src$$Register, false);
12393 %}
12394 ins_pipe(ialu_reg_reg);
12395 %}
12396
12397 // Rotate Left by constant.
12398 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12399 %{
12400 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12401 match(Set dst (RotateLeft dst shift));
12402 effect(KILL cr);
12403 format %{ "rolq $dst, $shift" %}
12404 ins_encode %{
12405 __ rolq($dst$$Register, $shift$$constant);
12406 %}
12407 ins_pipe(ialu_reg);
12408 %}
12409
12410 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12411 %{
12412 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12413 match(Set dst (RotateLeft src shift));
12414 format %{ "rolxq $dst, $src, $shift" %}
12415 ins_encode %{
12416 int shift = 64 - ($shift$$constant & 63);
12417 __ rorxq($dst$$Register, $src$$Register, shift);
12418 %}
12419 ins_pipe(ialu_reg_reg);
12420 %}
12421
12422 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12423 %{
12424 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12425 match(Set dst (RotateLeft (LoadL src) shift));
12426 ins_cost(175);
12427 format %{ "rolxq $dst, $src, $shift" %}
12428 ins_encode %{
12429 int shift = 64 - ($shift$$constant & 63);
12430 __ rorxq($dst$$Register, $src$$Address, shift);
12431 %}
12432 ins_pipe(ialu_reg_mem);
12433 %}
12434
12435 // Rotate Left by variable
12436 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12437 %{
12438 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12439 match(Set dst (RotateLeft dst shift));
12440 effect(KILL cr);
12441
12442 format %{ "rolq $dst, $shift" %}
12443 ins_encode %{
12444 __ rolq($dst$$Register);
12445 %}
12446 ins_pipe(ialu_reg_reg);
12447 %}
12448
12449 // Rotate Left by variable
12450 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12451 %{
12452 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12453 match(Set dst (RotateLeft src shift));
12454 effect(KILL cr);
12455 flag(PD::Flag_ndd_demotable_opr1);
12456
12457 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12458 ins_encode %{
12459 __ erolq($dst$$Register, $src$$Register, false);
12460 %}
12461 ins_pipe(ialu_reg_reg);
12462 %}
12463
12464 // Rotate Right by constant.
12465 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12466 %{
12467 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12468 match(Set dst (RotateRight dst shift));
12469 effect(KILL cr);
12470 format %{ "rorq $dst, $shift" %}
12471 ins_encode %{
12472 __ rorq($dst$$Register, $shift$$constant);
12473 %}
12474 ins_pipe(ialu_reg);
12475 %}
12476
12477 // Rotate Right by constant
12478 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12479 %{
12480 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12481 match(Set dst (RotateRight src shift));
12482 format %{ "rorxq $dst, $src, $shift" %}
12483 ins_encode %{
12484 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12485 %}
12486 ins_pipe(ialu_reg_reg);
12487 %}
12488
12489 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12490 %{
12491 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12492 match(Set dst (RotateRight (LoadL src) shift));
12493 ins_cost(175);
12494 format %{ "rorxq $dst, $src, $shift" %}
12495 ins_encode %{
12496 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12497 %}
12498 ins_pipe(ialu_reg_mem);
12499 %}
12500
12501 // Rotate Right by variable
12502 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12503 %{
12504 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12505 match(Set dst (RotateRight dst shift));
12506 effect(KILL cr);
12507 format %{ "rorq $dst, $shift" %}
12508 ins_encode %{
12509 __ rorq($dst$$Register);
12510 %}
12511 ins_pipe(ialu_reg_reg);
12512 %}
12513
12514 // Rotate Right by variable
12515 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12516 %{
12517 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12518 match(Set dst (RotateRight src shift));
12519 effect(KILL cr);
12520 flag(PD::Flag_ndd_demotable_opr1);
12521
12522 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12523 ins_encode %{
12524 __ erorq($dst$$Register, $src$$Register, false);
12525 %}
12526 ins_pipe(ialu_reg_reg);
12527 %}
12528
12529 //----------------------------- CompressBits/ExpandBits ------------------------
12530
12531 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12532 predicate(n->bottom_type()->isa_long());
12533 match(Set dst (CompressBits src mask));
12534 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12535 ins_encode %{
12536 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12537 %}
12538 ins_pipe( pipe_slow );
12539 %}
12540
12541 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12542 predicate(n->bottom_type()->isa_long());
12543 match(Set dst (ExpandBits src mask));
12544 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12545 ins_encode %{
12546 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12547 %}
12548 ins_pipe( pipe_slow );
12549 %}
12550
12551 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12552 predicate(n->bottom_type()->isa_long());
12553 match(Set dst (CompressBits src (LoadL mask)));
12554 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12555 ins_encode %{
12556 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12557 %}
12558 ins_pipe( pipe_slow );
12559 %}
12560
12561 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12562 predicate(n->bottom_type()->isa_long());
12563 match(Set dst (ExpandBits src (LoadL mask)));
12564 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12565 ins_encode %{
12566 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12567 %}
12568 ins_pipe( pipe_slow );
12569 %}
12570
12571
12572 // Logical Instructions
12573
12574 // Integer Logical Instructions
12575
12576 // And Instructions
12577 // And Register with Register
12578 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12579 %{
12580 predicate(!UseAPX);
12581 match(Set dst (AndI dst src));
12582 effect(KILL cr);
12583 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12584
12585 format %{ "andl $dst, $src\t# int" %}
12586 ins_encode %{
12587 __ andl($dst$$Register, $src$$Register);
12588 %}
12589 ins_pipe(ialu_reg_reg);
12590 %}
12591
12592 // And Register with Register using New Data Destination (NDD)
12593 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12594 %{
12595 predicate(UseAPX);
12596 match(Set dst (AndI src1 src2));
12597 effect(KILL cr);
12598 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12599
12600 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12601 ins_encode %{
12602 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12603
12604 %}
12605 ins_pipe(ialu_reg_reg);
12606 %}
12607
12608 // And Register with Immediate 255
12609 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12610 %{
12611 match(Set dst (AndI src mask));
12612
12613 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12614 ins_encode %{
12615 __ movzbl($dst$$Register, $src$$Register);
12616 %}
12617 ins_pipe(ialu_reg);
12618 %}
12619
12620 // And Register with Immediate 255 and promote to long
12621 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12622 %{
12623 match(Set dst (ConvI2L (AndI src mask)));
12624
12625 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12626 ins_encode %{
12627 __ movzbl($dst$$Register, $src$$Register);
12628 %}
12629 ins_pipe(ialu_reg);
12630 %}
12631
12632 // And Register with Immediate 65535
12633 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12634 %{
12635 match(Set dst (AndI src mask));
12636
12637 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12638 ins_encode %{
12639 __ movzwl($dst$$Register, $src$$Register);
12640 %}
12641 ins_pipe(ialu_reg);
12642 %}
12643
12644 // And Register with Immediate 65535 and promote to long
12645 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12646 %{
12647 match(Set dst (ConvI2L (AndI src mask)));
12648
12649 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12650 ins_encode %{
12651 __ movzwl($dst$$Register, $src$$Register);
12652 %}
12653 ins_pipe(ialu_reg);
12654 %}
12655
12656 // Can skip int2long conversions after AND with small bitmask
12657 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12658 %{
12659 predicate(VM_Version::supports_bmi2());
12660 ins_cost(125);
12661 effect(TEMP tmp, KILL cr);
12662 match(Set dst (ConvI2L (AndI src mask)));
12663 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12664 ins_encode %{
12665 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12666 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12667 %}
12668 ins_pipe(ialu_reg_reg);
12669 %}
12670
12671 // And Register with Immediate
12672 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12673 %{
12674 predicate(!UseAPX);
12675 match(Set dst (AndI dst src));
12676 effect(KILL cr);
12677 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12678
12679 format %{ "andl $dst, $src\t# int" %}
12680 ins_encode %{
12681 __ andl($dst$$Register, $src$$constant);
12682 %}
12683 ins_pipe(ialu_reg);
12684 %}
12685
12686 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12687 %{
12688 predicate(UseAPX);
12689 match(Set dst (AndI src1 src2));
12690 effect(KILL cr);
12691 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12692
12693 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12694 ins_encode %{
12695 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12696 %}
12697 ins_pipe(ialu_reg);
12698 %}
12699
12700 // And Register with Memory
12701 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12702 %{
12703 match(Set dst (AndI dst (LoadI src)));
12704 effect(KILL cr);
12705 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12706
12707 ins_cost(150);
12708 format %{ "andl $dst, $src\t# int" %}
12709 ins_encode %{
12710 __ andl($dst$$Register, $src$$Address);
12711 %}
12712 ins_pipe(ialu_reg_mem);
12713 %}
12714
12715 // And Memory with Register
12716 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12717 %{
12718 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12719 effect(KILL cr);
12720 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12721
12722 ins_cost(150);
12723 format %{ "andb $dst, $src\t# byte" %}
12724 ins_encode %{
12725 __ andb($dst$$Address, $src$$Register);
12726 %}
12727 ins_pipe(ialu_mem_reg);
12728 %}
12729
12730 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12731 %{
12732 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12733 effect(KILL cr);
12734 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12735
12736 ins_cost(150);
12737 format %{ "andl $dst, $src\t# int" %}
12738 ins_encode %{
12739 __ andl($dst$$Address, $src$$Register);
12740 %}
12741 ins_pipe(ialu_mem_reg);
12742 %}
12743
12744 // And Memory with Immediate
12745 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12746 %{
12747 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12748 effect(KILL cr);
12749 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12750
12751 ins_cost(125);
12752 format %{ "andl $dst, $src\t# int" %}
12753 ins_encode %{
12754 __ andl($dst$$Address, $src$$constant);
12755 %}
12756 ins_pipe(ialu_mem_imm);
12757 %}
12758
12759 // BMI1 instructions
12760 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12761 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12762 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12763 effect(KILL cr);
12764 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12765
12766 ins_cost(125);
12767 format %{ "andnl $dst, $src1, $src2" %}
12768
12769 ins_encode %{
12770 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
12771 %}
12772 ins_pipe(ialu_reg_mem);
12773 %}
12774
12775 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
12776 match(Set dst (AndI (XorI src1 minus_1) src2));
12777 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12778 effect(KILL cr);
12779 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12780
12781 format %{ "andnl $dst, $src1, $src2" %}
12782
12783 ins_encode %{
12784 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
12785 %}
12786 ins_pipe(ialu_reg);
12787 %}
12788
12789 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
12790 match(Set dst (AndI (SubI imm_zero src) src));
12791 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12792 effect(KILL cr);
12793 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12794
12795 format %{ "blsil $dst, $src" %}
12796
12797 ins_encode %{
12798 __ blsil($dst$$Register, $src$$Register);
12799 %}
12800 ins_pipe(ialu_reg);
12801 %}
12802
12803 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
12804 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
12805 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12806 effect(KILL cr);
12807 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12808
12809 ins_cost(125);
12810 format %{ "blsil $dst, $src" %}
12811
12812 ins_encode %{
12813 __ blsil($dst$$Register, $src$$Address);
12814 %}
12815 ins_pipe(ialu_reg_mem);
12816 %}
12817
12818 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12819 %{
12820 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
12821 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12822 effect(KILL cr);
12823 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12824
12825 ins_cost(125);
12826 format %{ "blsmskl $dst, $src" %}
12827
12828 ins_encode %{
12829 __ blsmskl($dst$$Register, $src$$Address);
12830 %}
12831 ins_pipe(ialu_reg_mem);
12832 %}
12833
12834 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12835 %{
12836 match(Set dst (XorI (AddI src minus_1) src));
12837 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12838 effect(KILL cr);
12839 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12840
12841 format %{ "blsmskl $dst, $src" %}
12842
12843 ins_encode %{
12844 __ blsmskl($dst$$Register, $src$$Register);
12845 %}
12846
12847 ins_pipe(ialu_reg);
12848 %}
12849
12850 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12851 %{
12852 match(Set dst (AndI (AddI src minus_1) src) );
12853 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12854 effect(KILL cr);
12855 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12856
12857 format %{ "blsrl $dst, $src" %}
12858
12859 ins_encode %{
12860 __ blsrl($dst$$Register, $src$$Register);
12861 %}
12862
12863 ins_pipe(ialu_reg_mem);
12864 %}
12865
12866 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12867 %{
12868 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
12869 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12870 effect(KILL cr);
12871 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12872
12873 ins_cost(125);
12874 format %{ "blsrl $dst, $src" %}
12875
12876 ins_encode %{
12877 __ blsrl($dst$$Register, $src$$Address);
12878 %}
12879
12880 ins_pipe(ialu_reg);
12881 %}
12882
12883 // Or Instructions
12884 // Or Register with Register
12885 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12886 %{
12887 predicate(!UseAPX);
12888 match(Set dst (OrI dst src));
12889 effect(KILL cr);
12890 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12891
12892 format %{ "orl $dst, $src\t# int" %}
12893 ins_encode %{
12894 __ orl($dst$$Register, $src$$Register);
12895 %}
12896 ins_pipe(ialu_reg_reg);
12897 %}
12898
12899 // Or Register with Register using New Data Destination (NDD)
12900 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12901 %{
12902 predicate(UseAPX);
12903 match(Set dst (OrI src1 src2));
12904 effect(KILL cr);
12905 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12906
12907 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
12908 ins_encode %{
12909 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
12910 %}
12911 ins_pipe(ialu_reg_reg);
12912 %}
12913
12914 // Or Register with Immediate
12915 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12916 %{
12917 predicate(!UseAPX);
12918 match(Set dst (OrI dst src));
12919 effect(KILL cr);
12920 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12921
12922 format %{ "orl $dst, $src\t# int" %}
12923 ins_encode %{
12924 __ orl($dst$$Register, $src$$constant);
12925 %}
12926 ins_pipe(ialu_reg);
12927 %}
12928
12929 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12930 %{
12931 predicate(UseAPX);
12932 match(Set dst (OrI src1 src2));
12933 effect(KILL cr);
12934 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12935
12936 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
12937 ins_encode %{
12938 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
12939 %}
12940 ins_pipe(ialu_reg);
12941 %}
12942
12943 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
12944 %{
12945 predicate(UseAPX);
12946 match(Set dst (OrI src1 src2));
12947 effect(KILL cr);
12948 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12949
12950 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
12951 ins_encode %{
12952 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
12953 %}
12954 ins_pipe(ialu_reg);
12955 %}
12956
12957 // Or Register with Memory
12958 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12959 %{
12960 match(Set dst (OrI dst (LoadI src)));
12961 effect(KILL cr);
12962 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12963
12964 ins_cost(150);
12965 format %{ "orl $dst, $src\t# int" %}
12966 ins_encode %{
12967 __ orl($dst$$Register, $src$$Address);
12968 %}
12969 ins_pipe(ialu_reg_mem);
12970 %}
12971
12972 // Or Memory with Register
12973 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12974 %{
12975 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
12976 effect(KILL cr);
12977 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12978
12979 ins_cost(150);
12980 format %{ "orb $dst, $src\t# byte" %}
12981 ins_encode %{
12982 __ orb($dst$$Address, $src$$Register);
12983 %}
12984 ins_pipe(ialu_mem_reg);
12985 %}
12986
12987 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12988 %{
12989 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
12990 effect(KILL cr);
12991 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12992
12993 ins_cost(150);
12994 format %{ "orl $dst, $src\t# int" %}
12995 ins_encode %{
12996 __ orl($dst$$Address, $src$$Register);
12997 %}
12998 ins_pipe(ialu_mem_reg);
12999 %}
13000
13001 // Or Memory with Immediate
13002 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13003 %{
13004 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13005 effect(KILL cr);
13006 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13007
13008 ins_cost(125);
13009 format %{ "orl $dst, $src\t# int" %}
13010 ins_encode %{
13011 __ orl($dst$$Address, $src$$constant);
13012 %}
13013 ins_pipe(ialu_mem_imm);
13014 %}
13015
13016 // Xor Instructions
13017 // Xor Register with Register
13018 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13019 %{
13020 predicate(!UseAPX);
13021 match(Set dst (XorI dst src));
13022 effect(KILL cr);
13023 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13024
13025 format %{ "xorl $dst, $src\t# int" %}
13026 ins_encode %{
13027 __ xorl($dst$$Register, $src$$Register);
13028 %}
13029 ins_pipe(ialu_reg_reg);
13030 %}
13031
13032 // Xor Register with Register using New Data Destination (NDD)
13033 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13034 %{
13035 predicate(UseAPX);
13036 match(Set dst (XorI src1 src2));
13037 effect(KILL cr);
13038 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13039
13040 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13041 ins_encode %{
13042 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13043 %}
13044 ins_pipe(ialu_reg_reg);
13045 %}
13046
13047 // Xor Register with Immediate -1
13048 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13049 %{
13050 predicate(!UseAPX);
13051 match(Set dst (XorI dst imm));
13052
13053 format %{ "notl $dst" %}
13054 ins_encode %{
13055 __ notl($dst$$Register);
13056 %}
13057 ins_pipe(ialu_reg);
13058 %}
13059
13060 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13061 %{
13062 match(Set dst (XorI src imm));
13063 predicate(UseAPX);
13064 flag(PD::Flag_ndd_demotable_opr1);
13065
13066 format %{ "enotl $dst, $src" %}
13067 ins_encode %{
13068 __ enotl($dst$$Register, $src$$Register);
13069 %}
13070 ins_pipe(ialu_reg);
13071 %}
13072
13073 // Xor Register with Immediate
13074 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13075 %{
13076 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13077 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13078 match(Set dst (XorI dst src));
13079 effect(KILL cr);
13080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13081
13082 format %{ "xorl $dst, $src\t# int" %}
13083 ins_encode %{
13084 __ xorl($dst$$Register, $src$$constant);
13085 %}
13086 ins_pipe(ialu_reg);
13087 %}
13088
13089 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13090 %{
13091 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13092 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13093 match(Set dst (XorI src1 src2));
13094 effect(KILL cr);
13095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13096
13097 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13098 ins_encode %{
13099 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13100 %}
13101 ins_pipe(ialu_reg);
13102 %}
13103
13104 // Xor Register with Memory
13105 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13106 %{
13107 match(Set dst (XorI dst (LoadI src)));
13108 effect(KILL cr);
13109 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110
13111 ins_cost(150);
13112 format %{ "xorl $dst, $src\t# int" %}
13113 ins_encode %{
13114 __ xorl($dst$$Register, $src$$Address);
13115 %}
13116 ins_pipe(ialu_reg_mem);
13117 %}
13118
13119 // Xor Memory with Register
13120 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13121 %{
13122 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13123 effect(KILL cr);
13124 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13125
13126 ins_cost(150);
13127 format %{ "xorb $dst, $src\t# byte" %}
13128 ins_encode %{
13129 __ xorb($dst$$Address, $src$$Register);
13130 %}
13131 ins_pipe(ialu_mem_reg);
13132 %}
13133
13134 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13135 %{
13136 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13137 effect(KILL cr);
13138 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13139
13140 ins_cost(150);
13141 format %{ "xorl $dst, $src\t# int" %}
13142 ins_encode %{
13143 __ xorl($dst$$Address, $src$$Register);
13144 %}
13145 ins_pipe(ialu_mem_reg);
13146 %}
13147
13148 // Xor Memory with Immediate
13149 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13150 %{
13151 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13152 effect(KILL cr);
13153 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13154
13155 ins_cost(125);
13156 format %{ "xorl $dst, $src\t# int" %}
13157 ins_encode %{
13158 __ xorl($dst$$Address, $src$$constant);
13159 %}
13160 ins_pipe(ialu_mem_imm);
13161 %}
13162
13163
13164 // Long Logical Instructions
13165
13166 // And Instructions
13167 // And Register with Register
13168 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13169 %{
13170 predicate(!UseAPX);
13171 match(Set dst (AndL dst src));
13172 effect(KILL cr);
13173 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13174
13175 format %{ "andq $dst, $src\t# long" %}
13176 ins_encode %{
13177 __ andq($dst$$Register, $src$$Register);
13178 %}
13179 ins_pipe(ialu_reg_reg);
13180 %}
13181
13182 // And Register with Register using New Data Destination (NDD)
13183 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13184 %{
13185 predicate(UseAPX);
13186 match(Set dst (AndL src1 src2));
13187 effect(KILL cr);
13188 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13189
13190 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13191 ins_encode %{
13192 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13193
13194 %}
13195 ins_pipe(ialu_reg_reg);
13196 %}
13197
13198 // And Register with Immediate 255
13199 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13200 %{
13201 match(Set dst (AndL src mask));
13202
13203 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13204 ins_encode %{
13205 // movzbl zeroes out the upper 32-bit and does not need REX.W
13206 __ movzbl($dst$$Register, $src$$Register);
13207 %}
13208 ins_pipe(ialu_reg);
13209 %}
13210
13211 // And Register with Immediate 65535
13212 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13213 %{
13214 match(Set dst (AndL src mask));
13215
13216 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13217 ins_encode %{
13218 // movzwl zeroes out the upper 32-bit and does not need REX.W
13219 __ movzwl($dst$$Register, $src$$Register);
13220 %}
13221 ins_pipe(ialu_reg);
13222 %}
13223
13224 // And Register with Immediate
13225 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13226 %{
13227 predicate(!UseAPX);
13228 match(Set dst (AndL dst src));
13229 effect(KILL cr);
13230 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13231
13232 format %{ "andq $dst, $src\t# long" %}
13233 ins_encode %{
13234 __ andq($dst$$Register, $src$$constant);
13235 %}
13236 ins_pipe(ialu_reg);
13237 %}
13238
13239 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13240 %{
13241 predicate(UseAPX);
13242 match(Set dst (AndL src1 src2));
13243 effect(KILL cr);
13244 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13245
13246 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13247 ins_encode %{
13248 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13249 %}
13250 ins_pipe(ialu_reg);
13251 %}
13252
13253 // And Register with Memory
13254 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13255 %{
13256 match(Set dst (AndL dst (LoadL src)));
13257 effect(KILL cr);
13258 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13259
13260 ins_cost(150);
13261 format %{ "andq $dst, $src\t# long" %}
13262 ins_encode %{
13263 __ andq($dst$$Register, $src$$Address);
13264 %}
13265 ins_pipe(ialu_reg_mem);
13266 %}
13267
13268 // And Memory with Register
13269 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13270 %{
13271 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13272 effect(KILL cr);
13273 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13274
13275 ins_cost(150);
13276 format %{ "andq $dst, $src\t# long" %}
13277 ins_encode %{
13278 __ andq($dst$$Address, $src$$Register);
13279 %}
13280 ins_pipe(ialu_mem_reg);
13281 %}
13282
13283 // And Memory with Immediate
13284 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13285 %{
13286 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13287 effect(KILL cr);
13288 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13289
13290 ins_cost(125);
13291 format %{ "andq $dst, $src\t# long" %}
13292 ins_encode %{
13293 __ andq($dst$$Address, $src$$constant);
13294 %}
13295 ins_pipe(ialu_mem_imm);
13296 %}
13297
13298 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13299 %{
13300 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13301 // because AND/OR works well enough for 8/32-bit values.
13302 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13303
13304 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13305 effect(KILL cr);
13306
13307 ins_cost(125);
13308 format %{ "btrq $dst, log2(not($con))\t# long" %}
13309 ins_encode %{
13310 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13311 %}
13312 ins_pipe(ialu_mem_imm);
13313 %}
13314
13315 // BMI1 instructions
13316 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13317 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13318 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13319 effect(KILL cr);
13320 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13321
13322 ins_cost(125);
13323 format %{ "andnq $dst, $src1, $src2" %}
13324
13325 ins_encode %{
13326 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13327 %}
13328 ins_pipe(ialu_reg_mem);
13329 %}
13330
13331 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13332 match(Set dst (AndL (XorL src1 minus_1) src2));
13333 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13334 effect(KILL cr);
13335 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13336
13337 format %{ "andnq $dst, $src1, $src2" %}
13338
13339 ins_encode %{
13340 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13341 %}
13342 ins_pipe(ialu_reg_mem);
13343 %}
13344
13345 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13346 match(Set dst (AndL (SubL imm_zero src) src));
13347 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13348 effect(KILL cr);
13349 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13350
13351 format %{ "blsiq $dst, $src" %}
13352
13353 ins_encode %{
13354 __ blsiq($dst$$Register, $src$$Register);
13355 %}
13356 ins_pipe(ialu_reg);
13357 %}
13358
13359 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13360 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13361 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13362 effect(KILL cr);
13363 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13364
13365 ins_cost(125);
13366 format %{ "blsiq $dst, $src" %}
13367
13368 ins_encode %{
13369 __ blsiq($dst$$Register, $src$$Address);
13370 %}
13371 ins_pipe(ialu_reg_mem);
13372 %}
13373
13374 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13375 %{
13376 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13377 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13378 effect(KILL cr);
13379 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13380
13381 ins_cost(125);
13382 format %{ "blsmskq $dst, $src" %}
13383
13384 ins_encode %{
13385 __ blsmskq($dst$$Register, $src$$Address);
13386 %}
13387 ins_pipe(ialu_reg_mem);
13388 %}
13389
13390 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13391 %{
13392 match(Set dst (XorL (AddL src minus_1) src));
13393 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13394 effect(KILL cr);
13395 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13396
13397 format %{ "blsmskq $dst, $src" %}
13398
13399 ins_encode %{
13400 __ blsmskq($dst$$Register, $src$$Register);
13401 %}
13402
13403 ins_pipe(ialu_reg);
13404 %}
13405
13406 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13407 %{
13408 match(Set dst (AndL (AddL src minus_1) src) );
13409 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13410 effect(KILL cr);
13411 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13412
13413 format %{ "blsrq $dst, $src" %}
13414
13415 ins_encode %{
13416 __ blsrq($dst$$Register, $src$$Register);
13417 %}
13418
13419 ins_pipe(ialu_reg);
13420 %}
13421
13422 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13423 %{
13424 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13425 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13426 effect(KILL cr);
13427 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13428
13429 ins_cost(125);
13430 format %{ "blsrq $dst, $src" %}
13431
13432 ins_encode %{
13433 __ blsrq($dst$$Register, $src$$Address);
13434 %}
13435
13436 ins_pipe(ialu_reg);
13437 %}
13438
13439 // Or Instructions
13440 // Or Register with Register
13441 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13442 %{
13443 predicate(!UseAPX);
13444 match(Set dst (OrL dst src));
13445 effect(KILL cr);
13446 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13447
13448 format %{ "orq $dst, $src\t# long" %}
13449 ins_encode %{
13450 __ orq($dst$$Register, $src$$Register);
13451 %}
13452 ins_pipe(ialu_reg_reg);
13453 %}
13454
13455 // Or Register with Register using New Data Destination (NDD)
13456 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13457 %{
13458 predicate(UseAPX);
13459 match(Set dst (OrL src1 src2));
13460 effect(KILL cr);
13461 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13462
13463 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13464 ins_encode %{
13465 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13466
13467 %}
13468 ins_pipe(ialu_reg_reg);
13469 %}
13470
13471 // Use any_RegP to match R15 (TLS register) without spilling.
13472 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13473 predicate(!UseAPX);
13474 match(Set dst (OrL dst (CastP2X src)));
13475 effect(KILL cr);
13476 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13477
13478 format %{ "orq $dst, $src\t# long" %}
13479 ins_encode %{
13480 __ orq($dst$$Register, $src$$Register);
13481 %}
13482 ins_pipe(ialu_reg_reg);
13483 %}
13484
13485 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13486 predicate(UseAPX);
13487 match(Set dst (OrL src1 (CastP2X src2)));
13488 effect(KILL cr);
13489 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490
13491 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13492 ins_encode %{
13493 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13494 %}
13495 ins_pipe(ialu_reg_reg);
13496 %}
13497
13498 // Or Register with Immediate
13499 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13500 %{
13501 predicate(!UseAPX);
13502 match(Set dst (OrL dst src));
13503 effect(KILL cr);
13504 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13505
13506 format %{ "orq $dst, $src\t# long" %}
13507 ins_encode %{
13508 __ orq($dst$$Register, $src$$constant);
13509 %}
13510 ins_pipe(ialu_reg);
13511 %}
13512
13513 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13514 %{
13515 predicate(UseAPX);
13516 match(Set dst (OrL src1 src2));
13517 effect(KILL cr);
13518 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13519
13520 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13521 ins_encode %{
13522 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13523 %}
13524 ins_pipe(ialu_reg);
13525 %}
13526
13527 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13528 %{
13529 predicate(UseAPX);
13530 match(Set dst (OrL src1 src2));
13531 effect(KILL cr);
13532 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13533
13534 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13535 ins_encode %{
13536 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13537 %}
13538 ins_pipe(ialu_reg);
13539 %}
13540
13541 // Or Register with Memory
13542 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13543 %{
13544 match(Set dst (OrL dst (LoadL src)));
13545 effect(KILL cr);
13546 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547
13548 ins_cost(150);
13549 format %{ "orq $dst, $src\t# long" %}
13550 ins_encode %{
13551 __ orq($dst$$Register, $src$$Address);
13552 %}
13553 ins_pipe(ialu_reg_mem);
13554 %}
13555
13556 // Or Memory with Register
13557 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13558 %{
13559 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13560 effect(KILL cr);
13561 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562
13563 ins_cost(150);
13564 format %{ "orq $dst, $src\t# long" %}
13565 ins_encode %{
13566 __ orq($dst$$Address, $src$$Register);
13567 %}
13568 ins_pipe(ialu_mem_reg);
13569 %}
13570
13571 // Or Memory with Immediate
13572 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13573 %{
13574 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13575 effect(KILL cr);
13576 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13577
13578 ins_cost(125);
13579 format %{ "orq $dst, $src\t# long" %}
13580 ins_encode %{
13581 __ orq($dst$$Address, $src$$constant);
13582 %}
13583 ins_pipe(ialu_mem_imm);
13584 %}
13585
13586 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13587 %{
13588 // con should be a pure 64-bit power of 2 immediate
13589 // because AND/OR works well enough for 8/32-bit values.
13590 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13591
13592 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13593 effect(KILL cr);
13594
13595 ins_cost(125);
13596 format %{ "btsq $dst, log2($con)\t# long" %}
13597 ins_encode %{
13598 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13599 %}
13600 ins_pipe(ialu_mem_imm);
13601 %}
13602
13603 // Xor Instructions
13604 // Xor Register with Register
13605 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13606 %{
13607 predicate(!UseAPX);
13608 match(Set dst (XorL dst src));
13609 effect(KILL cr);
13610 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13611
13612 format %{ "xorq $dst, $src\t# long" %}
13613 ins_encode %{
13614 __ xorq($dst$$Register, $src$$Register);
13615 %}
13616 ins_pipe(ialu_reg_reg);
13617 %}
13618
13619 // Xor Register with Register using New Data Destination (NDD)
13620 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13621 %{
13622 predicate(UseAPX);
13623 match(Set dst (XorL src1 src2));
13624 effect(KILL cr);
13625 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13626
13627 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13628 ins_encode %{
13629 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13630 %}
13631 ins_pipe(ialu_reg_reg);
13632 %}
13633
13634 // Xor Register with Immediate -1
13635 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13636 %{
13637 predicate(!UseAPX);
13638 match(Set dst (XorL dst imm));
13639
13640 format %{ "notq $dst" %}
13641 ins_encode %{
13642 __ notq($dst$$Register);
13643 %}
13644 ins_pipe(ialu_reg);
13645 %}
13646
13647 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
13648 %{
13649 predicate(UseAPX);
13650 match(Set dst (XorL src imm));
13651 flag(PD::Flag_ndd_demotable_opr1);
13652
13653 format %{ "enotq $dst, $src" %}
13654 ins_encode %{
13655 __ enotq($dst$$Register, $src$$Register);
13656 %}
13657 ins_pipe(ialu_reg);
13658 %}
13659
13660 // Xor Register with Immediate
13661 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13662 %{
13663 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
13664 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13665 match(Set dst (XorL dst src));
13666 effect(KILL cr);
13667 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668
13669 format %{ "xorq $dst, $src\t# long" %}
13670 ins_encode %{
13671 __ xorq($dst$$Register, $src$$constant);
13672 %}
13673 ins_pipe(ialu_reg);
13674 %}
13675
13676 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13677 %{
13678 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
13679 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13680 match(Set dst (XorL src1 src2));
13681 effect(KILL cr);
13682 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13683
13684 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13685 ins_encode %{
13686 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13687 %}
13688 ins_pipe(ialu_reg);
13689 %}
13690
13691 // Xor Register with Memory
13692 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13693 %{
13694 match(Set dst (XorL dst (LoadL src)));
13695 effect(KILL cr);
13696 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13697
13698 ins_cost(150);
13699 format %{ "xorq $dst, $src\t# long" %}
13700 ins_encode %{
13701 __ xorq($dst$$Register, $src$$Address);
13702 %}
13703 ins_pipe(ialu_reg_mem);
13704 %}
13705
13706 // Xor Memory with Register
13707 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13708 %{
13709 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13710 effect(KILL cr);
13711 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13712
13713 ins_cost(150);
13714 format %{ "xorq $dst, $src\t# long" %}
13715 ins_encode %{
13716 __ xorq($dst$$Address, $src$$Register);
13717 %}
13718 ins_pipe(ialu_mem_reg);
13719 %}
13720
13721 // Xor Memory with Immediate
13722 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13723 %{
13724 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13725 effect(KILL cr);
13726 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13727
13728 ins_cost(125);
13729 format %{ "xorq $dst, $src\t# long" %}
13730 ins_encode %{
13731 __ xorq($dst$$Address, $src$$constant);
13732 %}
13733 ins_pipe(ialu_mem_imm);
13734 %}
13735
13736 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
13737 %{
13738 match(Set dst (CmpLTMask p q));
13739 effect(KILL cr);
13740
13741 ins_cost(400);
13742 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
13743 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
13744 "negl $dst" %}
13745 ins_encode %{
13746 __ cmpl($p$$Register, $q$$Register);
13747 __ setcc(Assembler::less, $dst$$Register);
13748 __ negl($dst$$Register);
13749 %}
13750 ins_pipe(pipe_slow);
13751 %}
13752
13753 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
13754 %{
13755 match(Set dst (CmpLTMask dst zero));
13756 effect(KILL cr);
13757
13758 ins_cost(100);
13759 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
13760 ins_encode %{
13761 __ sarl($dst$$Register, 31);
13762 %}
13763 ins_pipe(ialu_reg);
13764 %}
13765
13766 /* Better to save a register than avoid a branch */
13767 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13768 %{
13769 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
13770 effect(KILL cr);
13771 ins_cost(300);
13772 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
13773 "jge done\n\t"
13774 "addl $p,$y\n"
13775 "done: " %}
13776 ins_encode %{
13777 Register Rp = $p$$Register;
13778 Register Rq = $q$$Register;
13779 Register Ry = $y$$Register;
13780 Label done;
13781 __ subl(Rp, Rq);
13782 __ jccb(Assembler::greaterEqual, done);
13783 __ addl(Rp, Ry);
13784 __ bind(done);
13785 %}
13786 ins_pipe(pipe_cmplt);
13787 %}
13788
13789 /* Better to save a register than avoid a branch */
13790 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13791 %{
13792 match(Set y (AndI (CmpLTMask p q) y));
13793 effect(KILL cr);
13794
13795 ins_cost(300);
13796
13797 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
13798 "jlt done\n\t"
13799 "xorl $y, $y\n"
13800 "done: " %}
13801 ins_encode %{
13802 Register Rp = $p$$Register;
13803 Register Rq = $q$$Register;
13804 Register Ry = $y$$Register;
13805 Label done;
13806 __ cmpl(Rp, Rq);
13807 __ jccb(Assembler::less, done);
13808 __ xorl(Ry, Ry);
13809 __ bind(done);
13810 %}
13811 ins_pipe(pipe_cmplt);
13812 %}
13813
13814
13815 //---------- FP Instructions------------------------------------------------
13816
13817 // Really expensive, avoid
13818 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
13819 %{
13820 match(Set cr (CmpF src1 src2));
13821
13822 ins_cost(500);
13823 format %{ "ucomiss $src1, $src2\n\t"
13824 "jnp,s exit\n\t"
13825 "pushfq\t# saw NaN, set CF\n\t"
13826 "andq [rsp], #0xffffff2b\n\t"
13827 "popfq\n"
13828 "exit:" %}
13829 ins_encode %{
13830 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13831 emit_cmpfp_fixup(masm);
13832 %}
13833 ins_pipe(pipe_slow);
13834 %}
13835
13836 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
13837 match(Set cr (CmpF src1 src2));
13838
13839 ins_cost(100);
13840 format %{ "ucomiss $src1, $src2" %}
13841 ins_encode %{
13842 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13843 %}
13844 ins_pipe(pipe_slow);
13845 %}
13846
13847 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
13848 match(Set cr (CmpF src1 src2));
13849
13850 ins_cost(100);
13851 format %{ "evucomxss $src1, $src2" %}
13852 ins_encode %{
13853 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
13854 %}
13855 ins_pipe(pipe_slow);
13856 %}
13857
13858 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
13859 match(Set cr (CmpF src1 (LoadF src2)));
13860
13861 ins_cost(100);
13862 format %{ "ucomiss $src1, $src2" %}
13863 ins_encode %{
13864 __ ucomiss($src1$$XMMRegister, $src2$$Address);
13865 %}
13866 ins_pipe(pipe_slow);
13867 %}
13868
13869 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
13870 match(Set cr (CmpF src1 (LoadF src2)));
13871
13872 ins_cost(100);
13873 format %{ "evucomxss $src1, $src2" %}
13874 ins_encode %{
13875 __ evucomxss($src1$$XMMRegister, $src2$$Address);
13876 %}
13877 ins_pipe(pipe_slow);
13878 %}
13879
13880 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
13881 match(Set cr (CmpF src con));
13882
13883 ins_cost(100);
13884 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13885 ins_encode %{
13886 __ ucomiss($src$$XMMRegister, $constantaddress($con));
13887 %}
13888 ins_pipe(pipe_slow);
13889 %}
13890
13891 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
13892 match(Set cr (CmpF src con));
13893
13894 ins_cost(100);
13895 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13896 ins_encode %{
13897 __ evucomxss($src$$XMMRegister, $constantaddress($con));
13898 %}
13899 ins_pipe(pipe_slow);
13900 %}
13901
13902 // Really expensive, avoid
13903 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
13904 %{
13905 match(Set cr (CmpD src1 src2));
13906
13907 ins_cost(500);
13908 format %{ "ucomisd $src1, $src2\n\t"
13909 "jnp,s exit\n\t"
13910 "pushfq\t# saw NaN, set CF\n\t"
13911 "andq [rsp], #0xffffff2b\n\t"
13912 "popfq\n"
13913 "exit:" %}
13914 ins_encode %{
13915 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13916 emit_cmpfp_fixup(masm);
13917 %}
13918 ins_pipe(pipe_slow);
13919 %}
13920
13921 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
13922 match(Set cr (CmpD src1 src2));
13923
13924 ins_cost(100);
13925 format %{ "ucomisd $src1, $src2 test" %}
13926 ins_encode %{
13927 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13928 %}
13929 ins_pipe(pipe_slow);
13930 %}
13931
13932 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
13933 match(Set cr (CmpD src1 src2));
13934
13935 ins_cost(100);
13936 format %{ "evucomxsd $src1, $src2 test" %}
13937 ins_encode %{
13938 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
13939 %}
13940 ins_pipe(pipe_slow);
13941 %}
13942
13943 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
13944 match(Set cr (CmpD src1 (LoadD src2)));
13945
13946 ins_cost(100);
13947 format %{ "ucomisd $src1, $src2" %}
13948 ins_encode %{
13949 __ ucomisd($src1$$XMMRegister, $src2$$Address);
13950 %}
13951 ins_pipe(pipe_slow);
13952 %}
13953
13954 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
13955 match(Set cr (CmpD src1 (LoadD src2)));
13956
13957 ins_cost(100);
13958 format %{ "evucomxsd $src1, $src2" %}
13959 ins_encode %{
13960 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
13961 %}
13962 ins_pipe(pipe_slow);
13963 %}
13964
13965 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
13966 match(Set cr (CmpD src con));
13967 ins_cost(100);
13968 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13969 ins_encode %{
13970 __ ucomisd($src$$XMMRegister, $constantaddress($con));
13971 %}
13972 ins_pipe(pipe_slow);
13973 %}
13974
13975 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
13976 match(Set cr (CmpD src con));
13977
13978 ins_cost(100);
13979 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13980 ins_encode %{
13981 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
13982 %}
13983 ins_pipe(pipe_slow);
13984 %}
13985
13986 // Compare into -1,0,1
13987 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
13988 %{
13989 match(Set dst (CmpF3 src1 src2));
13990 effect(KILL cr);
13991
13992 ins_cost(275);
13993 format %{ "ucomiss $src1, $src2\n\t"
13994 "movl $dst, #-1\n\t"
13995 "jp,s done\n\t"
13996 "jb,s done\n\t"
13997 "setne $dst\n\t"
13998 "movzbl $dst, $dst\n"
13999 "done:" %}
14000 ins_encode %{
14001 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14002 emit_cmpfp3(masm, $dst$$Register);
14003 %}
14004 ins_pipe(pipe_slow);
14005 %}
14006
14007 // Compare into -1,0,1
14008 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14009 %{
14010 match(Set dst (CmpF3 src1 (LoadF src2)));
14011 effect(KILL cr);
14012
14013 ins_cost(275);
14014 format %{ "ucomiss $src1, $src2\n\t"
14015 "movl $dst, #-1\n\t"
14016 "jp,s done\n\t"
14017 "jb,s done\n\t"
14018 "setne $dst\n\t"
14019 "movzbl $dst, $dst\n"
14020 "done:" %}
14021 ins_encode %{
14022 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14023 emit_cmpfp3(masm, $dst$$Register);
14024 %}
14025 ins_pipe(pipe_slow);
14026 %}
14027
14028 // Compare into -1,0,1
14029 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14030 match(Set dst (CmpF3 src con));
14031 effect(KILL cr);
14032
14033 ins_cost(275);
14034 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14035 "movl $dst, #-1\n\t"
14036 "jp,s done\n\t"
14037 "jb,s done\n\t"
14038 "setne $dst\n\t"
14039 "movzbl $dst, $dst\n"
14040 "done:" %}
14041 ins_encode %{
14042 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14043 emit_cmpfp3(masm, $dst$$Register);
14044 %}
14045 ins_pipe(pipe_slow);
14046 %}
14047
14048 // Compare into -1,0,1
14049 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14050 %{
14051 match(Set dst (CmpD3 src1 src2));
14052 effect(KILL cr);
14053
14054 ins_cost(275);
14055 format %{ "ucomisd $src1, $src2\n\t"
14056 "movl $dst, #-1\n\t"
14057 "jp,s done\n\t"
14058 "jb,s done\n\t"
14059 "setne $dst\n\t"
14060 "movzbl $dst, $dst\n"
14061 "done:" %}
14062 ins_encode %{
14063 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14064 emit_cmpfp3(masm, $dst$$Register);
14065 %}
14066 ins_pipe(pipe_slow);
14067 %}
14068
14069 // Compare into -1,0,1
14070 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14071 %{
14072 match(Set dst (CmpD3 src1 (LoadD src2)));
14073 effect(KILL cr);
14074
14075 ins_cost(275);
14076 format %{ "ucomisd $src1, $src2\n\t"
14077 "movl $dst, #-1\n\t"
14078 "jp,s done\n\t"
14079 "jb,s done\n\t"
14080 "setne $dst\n\t"
14081 "movzbl $dst, $dst\n"
14082 "done:" %}
14083 ins_encode %{
14084 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14085 emit_cmpfp3(masm, $dst$$Register);
14086 %}
14087 ins_pipe(pipe_slow);
14088 %}
14089
14090 // Compare into -1,0,1
14091 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14092 match(Set dst (CmpD3 src con));
14093 effect(KILL cr);
14094
14095 ins_cost(275);
14096 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14097 "movl $dst, #-1\n\t"
14098 "jp,s done\n\t"
14099 "jb,s done\n\t"
14100 "setne $dst\n\t"
14101 "movzbl $dst, $dst\n"
14102 "done:" %}
14103 ins_encode %{
14104 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14105 emit_cmpfp3(masm, $dst$$Register);
14106 %}
14107 ins_pipe(pipe_slow);
14108 %}
14109
14110 //----------Arithmetic Conversion Instructions---------------------------------
14111
14112 instruct convF2D_reg_reg(regD dst, regF src)
14113 %{
14114 match(Set dst (ConvF2D src));
14115
14116 format %{ "cvtss2sd $dst, $src" %}
14117 ins_encode %{
14118 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14119 %}
14120 ins_pipe(pipe_slow); // XXX
14121 %}
14122
14123 instruct convF2D_reg_mem(regD dst, memory src)
14124 %{
14125 predicate(UseAVX == 0);
14126 match(Set dst (ConvF2D (LoadF src)));
14127
14128 format %{ "cvtss2sd $dst, $src" %}
14129 ins_encode %{
14130 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14131 %}
14132 ins_pipe(pipe_slow); // XXX
14133 %}
14134
14135 instruct convD2F_reg_reg(regF dst, regD src)
14136 %{
14137 match(Set dst (ConvD2F src));
14138
14139 format %{ "cvtsd2ss $dst, $src" %}
14140 ins_encode %{
14141 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14142 %}
14143 ins_pipe(pipe_slow); // XXX
14144 %}
14145
14146 instruct convD2F_reg_mem(regF dst, memory src)
14147 %{
14148 predicate(UseAVX == 0);
14149 match(Set dst (ConvD2F (LoadD src)));
14150
14151 format %{ "cvtsd2ss $dst, $src" %}
14152 ins_encode %{
14153 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14154 %}
14155 ins_pipe(pipe_slow); // XXX
14156 %}
14157
14158 // XXX do mem variants
14159 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14160 %{
14161 predicate(!VM_Version::supports_avx10_2());
14162 match(Set dst (ConvF2I src));
14163 effect(KILL cr);
14164 format %{ "convert_f2i $dst, $src" %}
14165 ins_encode %{
14166 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14167 %}
14168 ins_pipe(pipe_slow);
14169 %}
14170
14171 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14172 %{
14173 predicate(VM_Version::supports_avx10_2());
14174 match(Set dst (ConvF2I src));
14175 format %{ "evcvttss2sisl $dst, $src" %}
14176 ins_encode %{
14177 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14178 %}
14179 ins_pipe(pipe_slow);
14180 %}
14181
14182 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14183 %{
14184 predicate(VM_Version::supports_avx10_2());
14185 match(Set dst (ConvF2I (LoadF src)));
14186 format %{ "evcvttss2sisl $dst, $src" %}
14187 ins_encode %{
14188 __ evcvttss2sisl($dst$$Register, $src$$Address);
14189 %}
14190 ins_pipe(pipe_slow);
14191 %}
14192
14193 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14194 %{
14195 predicate(!VM_Version::supports_avx10_2());
14196 match(Set dst (ConvF2L src));
14197 effect(KILL cr);
14198 format %{ "convert_f2l $dst, $src"%}
14199 ins_encode %{
14200 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14201 %}
14202 ins_pipe(pipe_slow);
14203 %}
14204
14205 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14206 %{
14207 predicate(VM_Version::supports_avx10_2());
14208 match(Set dst (ConvF2L src));
14209 format %{ "evcvttss2sisq $dst, $src" %}
14210 ins_encode %{
14211 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14212 %}
14213 ins_pipe(pipe_slow);
14214 %}
14215
14216 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14217 %{
14218 predicate(VM_Version::supports_avx10_2());
14219 match(Set dst (ConvF2L (LoadF src)));
14220 format %{ "evcvttss2sisq $dst, $src" %}
14221 ins_encode %{
14222 __ evcvttss2sisq($dst$$Register, $src$$Address);
14223 %}
14224 ins_pipe(pipe_slow);
14225 %}
14226
14227 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14228 %{
14229 predicate(!VM_Version::supports_avx10_2());
14230 match(Set dst (ConvD2I src));
14231 effect(KILL cr);
14232 format %{ "convert_d2i $dst, $src"%}
14233 ins_encode %{
14234 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14235 %}
14236 ins_pipe(pipe_slow);
14237 %}
14238
14239 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14240 %{
14241 predicate(VM_Version::supports_avx10_2());
14242 match(Set dst (ConvD2I src));
14243 format %{ "evcvttsd2sisl $dst, $src" %}
14244 ins_encode %{
14245 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14246 %}
14247 ins_pipe(pipe_slow);
14248 %}
14249
14250 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14251 %{
14252 predicate(VM_Version::supports_avx10_2());
14253 match(Set dst (ConvD2I (LoadD src)));
14254 format %{ "evcvttsd2sisl $dst, $src" %}
14255 ins_encode %{
14256 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14257 %}
14258 ins_pipe(pipe_slow);
14259 %}
14260
14261 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14262 %{
14263 predicate(!VM_Version::supports_avx10_2());
14264 match(Set dst (ConvD2L src));
14265 effect(KILL cr);
14266 format %{ "convert_d2l $dst, $src"%}
14267 ins_encode %{
14268 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14269 %}
14270 ins_pipe(pipe_slow);
14271 %}
14272
14273 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14274 %{
14275 predicate(VM_Version::supports_avx10_2());
14276 match(Set dst (ConvD2L src));
14277 format %{ "evcvttsd2sisq $dst, $src" %}
14278 ins_encode %{
14279 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14280 %}
14281 ins_pipe(pipe_slow);
14282 %}
14283
14284 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14285 %{
14286 predicate(VM_Version::supports_avx10_2());
14287 match(Set dst (ConvD2L (LoadD src)));
14288 format %{ "evcvttsd2sisq $dst, $src" %}
14289 ins_encode %{
14290 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14291 %}
14292 ins_pipe(pipe_slow);
14293 %}
14294
14295 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14296 %{
14297 match(Set dst (RoundD src));
14298 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14299 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14300 ins_encode %{
14301 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14302 %}
14303 ins_pipe(pipe_slow);
14304 %}
14305
14306 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14307 %{
14308 match(Set dst (RoundF src));
14309 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14310 format %{ "round_float $dst,$src" %}
14311 ins_encode %{
14312 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14313 %}
14314 ins_pipe(pipe_slow);
14315 %}
14316
14317 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14318 %{
14319 predicate(!UseXmmI2F);
14320 match(Set dst (ConvI2F src));
14321
14322 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14323 ins_encode %{
14324 if (UseAVX > 0) {
14325 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14326 }
14327 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14328 %}
14329 ins_pipe(pipe_slow); // XXX
14330 %}
14331
14332 instruct convI2F_reg_mem(regF dst, memory src)
14333 %{
14334 predicate(UseAVX == 0);
14335 match(Set dst (ConvI2F (LoadI src)));
14336
14337 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14338 ins_encode %{
14339 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14340 %}
14341 ins_pipe(pipe_slow); // XXX
14342 %}
14343
14344 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14345 %{
14346 predicate(!UseXmmI2D);
14347 match(Set dst (ConvI2D src));
14348
14349 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14350 ins_encode %{
14351 if (UseAVX > 0) {
14352 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14353 }
14354 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14355 %}
14356 ins_pipe(pipe_slow); // XXX
14357 %}
14358
14359 instruct convI2D_reg_mem(regD dst, memory src)
14360 %{
14361 predicate(UseAVX == 0);
14362 match(Set dst (ConvI2D (LoadI src)));
14363
14364 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14365 ins_encode %{
14366 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14367 %}
14368 ins_pipe(pipe_slow); // XXX
14369 %}
14370
14371 instruct convXI2F_reg(regF dst, rRegI src)
14372 %{
14373 predicate(UseXmmI2F);
14374 match(Set dst (ConvI2F src));
14375
14376 format %{ "movdl $dst, $src\n\t"
14377 "cvtdq2psl $dst, $dst\t# i2f" %}
14378 ins_encode %{
14379 __ movdl($dst$$XMMRegister, $src$$Register);
14380 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14381 %}
14382 ins_pipe(pipe_slow); // XXX
14383 %}
14384
14385 instruct convXI2D_reg(regD dst, rRegI src)
14386 %{
14387 predicate(UseXmmI2D);
14388 match(Set dst (ConvI2D src));
14389
14390 format %{ "movdl $dst, $src\n\t"
14391 "cvtdq2pdl $dst, $dst\t# i2d" %}
14392 ins_encode %{
14393 __ movdl($dst$$XMMRegister, $src$$Register);
14394 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14395 %}
14396 ins_pipe(pipe_slow); // XXX
14397 %}
14398
14399 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14400 %{
14401 match(Set dst (ConvL2F src));
14402
14403 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14404 ins_encode %{
14405 if (UseAVX > 0) {
14406 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14407 }
14408 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14409 %}
14410 ins_pipe(pipe_slow); // XXX
14411 %}
14412
14413 instruct convL2F_reg_mem(regF dst, memory src)
14414 %{
14415 predicate(UseAVX == 0);
14416 match(Set dst (ConvL2F (LoadL src)));
14417
14418 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14419 ins_encode %{
14420 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14421 %}
14422 ins_pipe(pipe_slow); // XXX
14423 %}
14424
14425 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14426 %{
14427 match(Set dst (ConvL2D src));
14428
14429 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14430 ins_encode %{
14431 if (UseAVX > 0) {
14432 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14433 }
14434 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14435 %}
14436 ins_pipe(pipe_slow); // XXX
14437 %}
14438
14439 instruct convL2D_reg_mem(regD dst, memory src)
14440 %{
14441 predicate(UseAVX == 0);
14442 match(Set dst (ConvL2D (LoadL src)));
14443
14444 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14445 ins_encode %{
14446 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14447 %}
14448 ins_pipe(pipe_slow); // XXX
14449 %}
14450
14451 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14452 %{
14453 match(Set dst (ConvI2L src));
14454
14455 ins_cost(125);
14456 format %{ "movslq $dst, $src\t# i2l" %}
14457 ins_encode %{
14458 __ movslq($dst$$Register, $src$$Register);
14459 %}
14460 ins_pipe(ialu_reg_reg);
14461 %}
14462
14463 // Zero-extend convert int to long
14464 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14465 %{
14466 match(Set dst (AndL (ConvI2L src) mask));
14467
14468 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14469 ins_encode %{
14470 if ($dst$$reg != $src$$reg) {
14471 __ movl($dst$$Register, $src$$Register);
14472 }
14473 %}
14474 ins_pipe(ialu_reg_reg);
14475 %}
14476
14477 // Zero-extend convert int to long
14478 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14479 %{
14480 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14481
14482 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14483 ins_encode %{
14484 __ movl($dst$$Register, $src$$Address);
14485 %}
14486 ins_pipe(ialu_reg_mem);
14487 %}
14488
14489 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14490 %{
14491 match(Set dst (AndL src mask));
14492
14493 format %{ "movl $dst, $src\t# zero-extend long" %}
14494 ins_encode %{
14495 __ movl($dst$$Register, $src$$Register);
14496 %}
14497 ins_pipe(ialu_reg_reg);
14498 %}
14499
14500 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14501 %{
14502 match(Set dst (ConvL2I src));
14503
14504 format %{ "movl $dst, $src\t# l2i" %}
14505 ins_encode %{
14506 __ movl($dst$$Register, $src$$Register);
14507 %}
14508 ins_pipe(ialu_reg_reg);
14509 %}
14510
14511
14512 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14513 match(Set dst (MoveF2I src));
14514 effect(DEF dst, USE src);
14515
14516 ins_cost(125);
14517 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14518 ins_encode %{
14519 __ movl($dst$$Register, Address(rsp, $src$$disp));
14520 %}
14521 ins_pipe(ialu_reg_mem);
14522 %}
14523
14524 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14525 match(Set dst (MoveI2F src));
14526 effect(DEF dst, USE src);
14527
14528 ins_cost(125);
14529 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14530 ins_encode %{
14531 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14532 %}
14533 ins_pipe(pipe_slow);
14534 %}
14535
14536 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14537 match(Set dst (MoveD2L src));
14538 effect(DEF dst, USE src);
14539
14540 ins_cost(125);
14541 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14542 ins_encode %{
14543 __ movq($dst$$Register, Address(rsp, $src$$disp));
14544 %}
14545 ins_pipe(ialu_reg_mem);
14546 %}
14547
14548 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14549 predicate(!UseXmmLoadAndClearUpper);
14550 match(Set dst (MoveL2D src));
14551 effect(DEF dst, USE src);
14552
14553 ins_cost(125);
14554 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14555 ins_encode %{
14556 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14557 %}
14558 ins_pipe(pipe_slow);
14559 %}
14560
14561 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14562 predicate(UseXmmLoadAndClearUpper);
14563 match(Set dst (MoveL2D src));
14564 effect(DEF dst, USE src);
14565
14566 ins_cost(125);
14567 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14568 ins_encode %{
14569 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14570 %}
14571 ins_pipe(pipe_slow);
14572 %}
14573
14574
14575 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14576 match(Set dst (MoveF2I src));
14577 effect(DEF dst, USE src);
14578
14579 ins_cost(95); // XXX
14580 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14581 ins_encode %{
14582 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14583 %}
14584 ins_pipe(pipe_slow);
14585 %}
14586
14587 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14588 match(Set dst (MoveI2F src));
14589 effect(DEF dst, USE src);
14590
14591 ins_cost(100);
14592 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14593 ins_encode %{
14594 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14595 %}
14596 ins_pipe( ialu_mem_reg );
14597 %}
14598
14599 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14600 match(Set dst (MoveD2L src));
14601 effect(DEF dst, USE src);
14602
14603 ins_cost(95); // XXX
14604 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14605 ins_encode %{
14606 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14607 %}
14608 ins_pipe(pipe_slow);
14609 %}
14610
14611 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14612 match(Set dst (MoveL2D src));
14613 effect(DEF dst, USE src);
14614
14615 ins_cost(100);
14616 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14617 ins_encode %{
14618 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14619 %}
14620 ins_pipe(ialu_mem_reg);
14621 %}
14622
14623 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14624 match(Set dst (MoveF2I src));
14625 effect(DEF dst, USE src);
14626 ins_cost(85);
14627 format %{ "movd $dst,$src\t# MoveF2I" %}
14628 ins_encode %{
14629 __ movdl($dst$$Register, $src$$XMMRegister);
14630 %}
14631 ins_pipe( pipe_slow );
14632 %}
14633
14634 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14635 match(Set dst (MoveD2L src));
14636 effect(DEF dst, USE src);
14637 ins_cost(85);
14638 format %{ "movd $dst,$src\t# MoveD2L" %}
14639 ins_encode %{
14640 __ movdq($dst$$Register, $src$$XMMRegister);
14641 %}
14642 ins_pipe( pipe_slow );
14643 %}
14644
14645 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14646 match(Set dst (MoveI2F src));
14647 effect(DEF dst, USE src);
14648 ins_cost(100);
14649 format %{ "movd $dst,$src\t# MoveI2F" %}
14650 ins_encode %{
14651 __ movdl($dst$$XMMRegister, $src$$Register);
14652 %}
14653 ins_pipe( pipe_slow );
14654 %}
14655
14656 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14657 match(Set dst (MoveL2D src));
14658 effect(DEF dst, USE src);
14659 ins_cost(100);
14660 format %{ "movd $dst,$src\t# MoveL2D" %}
14661 ins_encode %{
14662 __ movdq($dst$$XMMRegister, $src$$Register);
14663 %}
14664 ins_pipe( pipe_slow );
14665 %}
14666
14667 // Fast clearing of an array
14668 // Small non-constant lenght ClearArray for non-AVX512 targets.
14669 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14670 Universe dummy, rFlagsReg cr)
14671 %{
14672 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14673 match(Set dummy (ClearArray cnt base));
14674 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14675
14676 format %{ $$template
14677 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14678 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14679 $$emit$$"jg LARGE\n\t"
14680 $$emit$$"dec rcx\n\t"
14681 $$emit$$"js DONE\t# Zero length\n\t"
14682 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14683 $$emit$$"dec rcx\n\t"
14684 $$emit$$"jge LOOP\n\t"
14685 $$emit$$"jmp DONE\n\t"
14686 $$emit$$"# LARGE:\n\t"
14687 if (UseFastStosb) {
14688 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14689 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14690 } else if (UseXMMForObjInit) {
14691 $$emit$$"mov rdi,rax\n\t"
14692 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14693 $$emit$$"jmpq L_zero_64_bytes\n\t"
14694 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14695 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14696 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14697 $$emit$$"add 0x40,rax\n\t"
14698 $$emit$$"# L_zero_64_bytes:\n\t"
14699 $$emit$$"sub 0x8,rcx\n\t"
14700 $$emit$$"jge L_loop\n\t"
14701 $$emit$$"add 0x4,rcx\n\t"
14702 $$emit$$"jl L_tail\n\t"
14703 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14704 $$emit$$"add 0x20,rax\n\t"
14705 $$emit$$"sub 0x4,rcx\n\t"
14706 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14707 $$emit$$"add 0x4,rcx\n\t"
14708 $$emit$$"jle L_end\n\t"
14709 $$emit$$"dec rcx\n\t"
14710 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14711 $$emit$$"vmovq xmm0,(rax)\n\t"
14712 $$emit$$"add 0x8,rax\n\t"
14713 $$emit$$"dec rcx\n\t"
14714 $$emit$$"jge L_sloop\n\t"
14715 $$emit$$"# L_end:\n\t"
14716 } else {
14717 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14718 }
14719 $$emit$$"# DONE"
14720 %}
14721 ins_encode %{
14722 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14723 $tmp$$XMMRegister, false, knoreg);
14724 %}
14725 ins_pipe(pipe_slow);
14726 %}
14727
14728 // Small non-constant length ClearArray for AVX512 targets.
14729 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14730 Universe dummy, rFlagsReg cr)
14731 %{
14732 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
14733 match(Set dummy (ClearArray cnt base));
14734 ins_cost(125);
14735 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14736
14737 format %{ $$template
14738 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14739 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14740 $$emit$$"jg LARGE\n\t"
14741 $$emit$$"dec rcx\n\t"
14742 $$emit$$"js DONE\t# Zero length\n\t"
14743 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14744 $$emit$$"dec rcx\n\t"
14745 $$emit$$"jge LOOP\n\t"
14746 $$emit$$"jmp DONE\n\t"
14747 $$emit$$"# LARGE:\n\t"
14748 if (UseFastStosb) {
14749 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14750 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14751 } else if (UseXMMForObjInit) {
14752 $$emit$$"mov rdi,rax\n\t"
14753 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14754 $$emit$$"jmpq L_zero_64_bytes\n\t"
14755 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14756 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14757 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14758 $$emit$$"add 0x40,rax\n\t"
14759 $$emit$$"# L_zero_64_bytes:\n\t"
14760 $$emit$$"sub 0x8,rcx\n\t"
14761 $$emit$$"jge L_loop\n\t"
14762 $$emit$$"add 0x4,rcx\n\t"
14763 $$emit$$"jl L_tail\n\t"
14764 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14765 $$emit$$"add 0x20,rax\n\t"
14766 $$emit$$"sub 0x4,rcx\n\t"
14767 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14768 $$emit$$"add 0x4,rcx\n\t"
14769 $$emit$$"jle L_end\n\t"
14770 $$emit$$"dec rcx\n\t"
14771 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14772 $$emit$$"vmovq xmm0,(rax)\n\t"
14773 $$emit$$"add 0x8,rax\n\t"
14774 $$emit$$"dec rcx\n\t"
14775 $$emit$$"jge L_sloop\n\t"
14776 $$emit$$"# L_end:\n\t"
14777 } else {
14778 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14779 }
14780 $$emit$$"# DONE"
14781 %}
14782 ins_encode %{
14783 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14784 $tmp$$XMMRegister, false, $ktmp$$KRegister);
14785 %}
14786 ins_pipe(pipe_slow);
14787 %}
14788
14789 // Large non-constant length ClearArray for non-AVX512 targets.
14790 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14791 Universe dummy, rFlagsReg cr)
14792 %{
14793 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
14794 match(Set dummy (ClearArray cnt base));
14795 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14796
14797 format %{ $$template
14798 if (UseFastStosb) {
14799 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14800 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14801 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14802 } else if (UseXMMForObjInit) {
14803 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
14804 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14805 $$emit$$"jmpq L_zero_64_bytes\n\t"
14806 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14807 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14808 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14809 $$emit$$"add 0x40,rax\n\t"
14810 $$emit$$"# L_zero_64_bytes:\n\t"
14811 $$emit$$"sub 0x8,rcx\n\t"
14812 $$emit$$"jge L_loop\n\t"
14813 $$emit$$"add 0x4,rcx\n\t"
14814 $$emit$$"jl L_tail\n\t"
14815 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14816 $$emit$$"add 0x20,rax\n\t"
14817 $$emit$$"sub 0x4,rcx\n\t"
14818 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14819 $$emit$$"add 0x4,rcx\n\t"
14820 $$emit$$"jle L_end\n\t"
14821 $$emit$$"dec rcx\n\t"
14822 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14823 $$emit$$"vmovq xmm0,(rax)\n\t"
14824 $$emit$$"add 0x8,rax\n\t"
14825 $$emit$$"dec rcx\n\t"
14826 $$emit$$"jge L_sloop\n\t"
14827 $$emit$$"# L_end:\n\t"
14828 } else {
14829 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14830 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
14831 }
14832 %}
14833 ins_encode %{
14834 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14835 $tmp$$XMMRegister, true, knoreg);
14836 %}
14837 ins_pipe(pipe_slow);
14838 %}
14839
14840 // Large non-constant length ClearArray for AVX512 targets.
14841 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14842 Universe dummy, rFlagsReg cr)
14843 %{
14844 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
14845 match(Set dummy (ClearArray cnt base));
14846 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14847
14848 format %{ $$template
14849 if (UseFastStosb) {
14850 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14851 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14852 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14853 } else if (UseXMMForObjInit) {
14854 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
14855 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14856 $$emit$$"jmpq L_zero_64_bytes\n\t"
14857 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14858 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14859 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14860 $$emit$$"add 0x40,rax\n\t"
14861 $$emit$$"# L_zero_64_bytes:\n\t"
14862 $$emit$$"sub 0x8,rcx\n\t"
14863 $$emit$$"jge L_loop\n\t"
14864 $$emit$$"add 0x4,rcx\n\t"
14865 $$emit$$"jl L_tail\n\t"
14866 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14867 $$emit$$"add 0x20,rax\n\t"
14868 $$emit$$"sub 0x4,rcx\n\t"
14869 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14870 $$emit$$"add 0x4,rcx\n\t"
14871 $$emit$$"jle L_end\n\t"
14872 $$emit$$"dec rcx\n\t"
14873 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14874 $$emit$$"vmovq xmm0,(rax)\n\t"
14875 $$emit$$"add 0x8,rax\n\t"
14876 $$emit$$"dec rcx\n\t"
14877 $$emit$$"jge L_sloop\n\t"
14878 $$emit$$"# L_end:\n\t"
14879 } else {
14880 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14881 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
14882 }
14883 %}
14884 ins_encode %{
14885 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14886 $tmp$$XMMRegister, true, $ktmp$$KRegister);
14887 %}
14888 ins_pipe(pipe_slow);
14889 %}
14890
14891 // Small constant length ClearArray for AVX512 targets.
14892 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
14893 %{
14894 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
14895 match(Set dummy (ClearArray cnt base));
14896 ins_cost(100);
14897 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
14898 format %{ "clear_mem_imm $base , $cnt \n\t" %}
14899 ins_encode %{
14900 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
14901 %}
14902 ins_pipe(pipe_slow);
14903 %}
14904
14905 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14906 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14907 %{
14908 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14909 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14910 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14911
14912 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14913 ins_encode %{
14914 __ string_compare($str1$$Register, $str2$$Register,
14915 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14916 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
14917 %}
14918 ins_pipe( pipe_slow );
14919 %}
14920
14921 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14922 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14923 %{
14924 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14925 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14926 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14927
14928 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14929 ins_encode %{
14930 __ string_compare($str1$$Register, $str2$$Register,
14931 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14932 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
14933 %}
14934 ins_pipe( pipe_slow );
14935 %}
14936
14937 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14938 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14939 %{
14940 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14941 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14942 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14943
14944 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14945 ins_encode %{
14946 __ string_compare($str1$$Register, $str2$$Register,
14947 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14948 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
14949 %}
14950 ins_pipe( pipe_slow );
14951 %}
14952
14953 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14954 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14955 %{
14956 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14957 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14958 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14959
14960 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14961 ins_encode %{
14962 __ string_compare($str1$$Register, $str2$$Register,
14963 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14964 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
14965 %}
14966 ins_pipe( pipe_slow );
14967 %}
14968
14969 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14970 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14971 %{
14972 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14973 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14974 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14975
14976 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14977 ins_encode %{
14978 __ string_compare($str1$$Register, $str2$$Register,
14979 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14980 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
14981 %}
14982 ins_pipe( pipe_slow );
14983 %}
14984
14985 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14986 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14987 %{
14988 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14989 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14990 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14991
14992 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14993 ins_encode %{
14994 __ string_compare($str1$$Register, $str2$$Register,
14995 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14996 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
14997 %}
14998 ins_pipe( pipe_slow );
14999 %}
15000
15001 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15002 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15003 %{
15004 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15005 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15006 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15007
15008 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15009 ins_encode %{
15010 __ string_compare($str2$$Register, $str1$$Register,
15011 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15012 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15013 %}
15014 ins_pipe( pipe_slow );
15015 %}
15016
15017 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15018 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15019 %{
15020 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15021 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15022 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15023
15024 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15025 ins_encode %{
15026 __ string_compare($str2$$Register, $str1$$Register,
15027 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15028 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15029 %}
15030 ins_pipe( pipe_slow );
15031 %}
15032
15033 // fast search of substring with known size.
15034 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15035 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15036 %{
15037 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15038 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15039 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15040
15041 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15042 ins_encode %{
15043 int icnt2 = (int)$int_cnt2$$constant;
15044 if (icnt2 >= 16) {
15045 // IndexOf for constant substrings with size >= 16 elements
15046 // which don't need to be loaded through stack.
15047 __ string_indexofC8($str1$$Register, $str2$$Register,
15048 $cnt1$$Register, $cnt2$$Register,
15049 icnt2, $result$$Register,
15050 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15051 } else {
15052 // Small strings are loaded through stack if they cross page boundary.
15053 __ string_indexof($str1$$Register, $str2$$Register,
15054 $cnt1$$Register, $cnt2$$Register,
15055 icnt2, $result$$Register,
15056 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15057 }
15058 %}
15059 ins_pipe( pipe_slow );
15060 %}
15061
15062 // fast search of substring with known size.
15063 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15064 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15065 %{
15066 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15067 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15068 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15069
15070 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15071 ins_encode %{
15072 int icnt2 = (int)$int_cnt2$$constant;
15073 if (icnt2 >= 8) {
15074 // IndexOf for constant substrings with size >= 8 elements
15075 // which don't need to be loaded through stack.
15076 __ string_indexofC8($str1$$Register, $str2$$Register,
15077 $cnt1$$Register, $cnt2$$Register,
15078 icnt2, $result$$Register,
15079 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15080 } else {
15081 // Small strings are loaded through stack if they cross page boundary.
15082 __ string_indexof($str1$$Register, $str2$$Register,
15083 $cnt1$$Register, $cnt2$$Register,
15084 icnt2, $result$$Register,
15085 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15086 }
15087 %}
15088 ins_pipe( pipe_slow );
15089 %}
15090
15091 // fast search of substring with known size.
15092 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15093 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15094 %{
15095 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15096 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15097 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15098
15099 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15100 ins_encode %{
15101 int icnt2 = (int)$int_cnt2$$constant;
15102 if (icnt2 >= 8) {
15103 // IndexOf for constant substrings with size >= 8 elements
15104 // which don't need to be loaded through stack.
15105 __ string_indexofC8($str1$$Register, $str2$$Register,
15106 $cnt1$$Register, $cnt2$$Register,
15107 icnt2, $result$$Register,
15108 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15109 } else {
15110 // Small strings are loaded through stack if they cross page boundary.
15111 __ string_indexof($str1$$Register, $str2$$Register,
15112 $cnt1$$Register, $cnt2$$Register,
15113 icnt2, $result$$Register,
15114 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15115 }
15116 %}
15117 ins_pipe( pipe_slow );
15118 %}
15119
15120 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15121 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15122 %{
15123 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15124 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15125 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15126
15127 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15128 ins_encode %{
15129 __ string_indexof($str1$$Register, $str2$$Register,
15130 $cnt1$$Register, $cnt2$$Register,
15131 (-1), $result$$Register,
15132 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15133 %}
15134 ins_pipe( pipe_slow );
15135 %}
15136
15137 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15138 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15139 %{
15140 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15141 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15142 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15143
15144 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15145 ins_encode %{
15146 __ string_indexof($str1$$Register, $str2$$Register,
15147 $cnt1$$Register, $cnt2$$Register,
15148 (-1), $result$$Register,
15149 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15150 %}
15151 ins_pipe( pipe_slow );
15152 %}
15153
15154 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15155 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15156 %{
15157 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15158 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15159 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15160
15161 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15162 ins_encode %{
15163 __ string_indexof($str1$$Register, $str2$$Register,
15164 $cnt1$$Register, $cnt2$$Register,
15165 (-1), $result$$Register,
15166 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15167 %}
15168 ins_pipe( pipe_slow );
15169 %}
15170
15171 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15172 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15173 %{
15174 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15175 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15176 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15177 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15178 ins_encode %{
15179 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15180 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15181 %}
15182 ins_pipe( pipe_slow );
15183 %}
15184
15185 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15186 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15187 %{
15188 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15189 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15190 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15191 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15192 ins_encode %{
15193 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15194 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15195 %}
15196 ins_pipe( pipe_slow );
15197 %}
15198
15199 // fast string equals
15200 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15201 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15202 %{
15203 predicate(!VM_Version::supports_avx512vlbw());
15204 match(Set result (StrEquals (Binary str1 str2) cnt));
15205 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15206
15207 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15208 ins_encode %{
15209 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15210 $cnt$$Register, $result$$Register, $tmp3$$Register,
15211 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15212 %}
15213 ins_pipe( pipe_slow );
15214 %}
15215
15216 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15217 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15218 %{
15219 predicate(VM_Version::supports_avx512vlbw());
15220 match(Set result (StrEquals (Binary str1 str2) cnt));
15221 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15222
15223 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15224 ins_encode %{
15225 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15226 $cnt$$Register, $result$$Register, $tmp3$$Register,
15227 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15228 %}
15229 ins_pipe( pipe_slow );
15230 %}
15231
15232 // fast array equals
15233 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15234 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15235 %{
15236 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15237 match(Set result (AryEq ary1 ary2));
15238 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15239
15240 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15241 ins_encode %{
15242 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15243 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15244 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15245 %}
15246 ins_pipe( pipe_slow );
15247 %}
15248
15249 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15250 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15251 %{
15252 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15253 match(Set result (AryEq ary1 ary2));
15254 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15255
15256 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15257 ins_encode %{
15258 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15259 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15260 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15261 %}
15262 ins_pipe( pipe_slow );
15263 %}
15264
15265 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15266 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15267 %{
15268 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15269 match(Set result (AryEq ary1 ary2));
15270 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15271
15272 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15273 ins_encode %{
15274 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15275 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15276 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15277 %}
15278 ins_pipe( pipe_slow );
15279 %}
15280
15281 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15282 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15283 %{
15284 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15285 match(Set result (AryEq ary1 ary2));
15286 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15287
15288 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15289 ins_encode %{
15290 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15291 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15292 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15293 %}
15294 ins_pipe( pipe_slow );
15295 %}
15296
15297 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15298 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15299 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15300 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15301 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15302 %{
15303 predicate(UseAVX >= 2);
15304 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15305 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15306 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15307 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15308 USE basic_type, KILL cr);
15309
15310 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15311 ins_encode %{
15312 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15313 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15314 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15315 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15316 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15317 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15318 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15319 %}
15320 ins_pipe( pipe_slow );
15321 %}
15322
15323 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15324 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15325 %{
15326 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15327 match(Set result (CountPositives ary1 len));
15328 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15329
15330 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15331 ins_encode %{
15332 __ count_positives($ary1$$Register, $len$$Register,
15333 $result$$Register, $tmp3$$Register,
15334 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15335 %}
15336 ins_pipe( pipe_slow );
15337 %}
15338
15339 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15340 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15341 %{
15342 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15343 match(Set result (CountPositives ary1 len));
15344 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15345
15346 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15347 ins_encode %{
15348 __ count_positives($ary1$$Register, $len$$Register,
15349 $result$$Register, $tmp3$$Register,
15350 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15351 %}
15352 ins_pipe( pipe_slow );
15353 %}
15354
15355 // fast char[] to byte[] compression
15356 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15357 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15358 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15359 match(Set result (StrCompressedCopy src (Binary dst len)));
15360 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15361 USE_KILL len, KILL tmp5, KILL cr);
15362
15363 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15364 ins_encode %{
15365 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15366 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15367 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15368 knoreg, knoreg);
15369 %}
15370 ins_pipe( pipe_slow );
15371 %}
15372
15373 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15374 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15375 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15376 match(Set result (StrCompressedCopy src (Binary dst len)));
15377 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15378 USE_KILL len, KILL tmp5, KILL cr);
15379
15380 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15381 ins_encode %{
15382 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15383 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15384 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15385 $ktmp1$$KRegister, $ktmp2$$KRegister);
15386 %}
15387 ins_pipe( pipe_slow );
15388 %}
15389 // fast byte[] to char[] inflation
15390 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15391 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15392 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15393 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15394 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15395
15396 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15397 ins_encode %{
15398 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15399 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15400 %}
15401 ins_pipe( pipe_slow );
15402 %}
15403
15404 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15405 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15406 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15407 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15408 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15409
15410 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15411 ins_encode %{
15412 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15413 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15414 %}
15415 ins_pipe( pipe_slow );
15416 %}
15417
15418 // encode char[] to byte[] in ISO_8859_1
15419 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15420 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15421 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15422 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15423 match(Set result (EncodeISOArray src (Binary dst len)));
15424 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15425
15426 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15427 ins_encode %{
15428 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15429 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15430 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15431 %}
15432 ins_pipe( pipe_slow );
15433 %}
15434
15435 // encode char[] to byte[] in ASCII
15436 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15437 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15438 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15439 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15440 match(Set result (EncodeISOArray src (Binary dst len)));
15441 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15442
15443 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15444 ins_encode %{
15445 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15446 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15447 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15448 %}
15449 ins_pipe( pipe_slow );
15450 %}
15451
15452 //----------Overflow Math Instructions-----------------------------------------
15453
15454 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15455 %{
15456 match(Set cr (OverflowAddI op1 op2));
15457 effect(DEF cr, USE_KILL op1, USE op2);
15458
15459 format %{ "addl $op1, $op2\t# overflow check int" %}
15460
15461 ins_encode %{
15462 __ addl($op1$$Register, $op2$$Register);
15463 %}
15464 ins_pipe(ialu_reg_reg);
15465 %}
15466
15467 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15468 %{
15469 match(Set cr (OverflowAddI op1 op2));
15470 effect(DEF cr, USE_KILL op1, USE op2);
15471
15472 format %{ "addl $op1, $op2\t# overflow check int" %}
15473
15474 ins_encode %{
15475 __ addl($op1$$Register, $op2$$constant);
15476 %}
15477 ins_pipe(ialu_reg_reg);
15478 %}
15479
15480 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15481 %{
15482 match(Set cr (OverflowAddL op1 op2));
15483 effect(DEF cr, USE_KILL op1, USE op2);
15484
15485 format %{ "addq $op1, $op2\t# overflow check long" %}
15486 ins_encode %{
15487 __ addq($op1$$Register, $op2$$Register);
15488 %}
15489 ins_pipe(ialu_reg_reg);
15490 %}
15491
15492 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15493 %{
15494 match(Set cr (OverflowAddL op1 op2));
15495 effect(DEF cr, USE_KILL op1, USE op2);
15496
15497 format %{ "addq $op1, $op2\t# overflow check long" %}
15498 ins_encode %{
15499 __ addq($op1$$Register, $op2$$constant);
15500 %}
15501 ins_pipe(ialu_reg_reg);
15502 %}
15503
15504 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15505 %{
15506 match(Set cr (OverflowSubI op1 op2));
15507
15508 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15509 ins_encode %{
15510 __ cmpl($op1$$Register, $op2$$Register);
15511 %}
15512 ins_pipe(ialu_reg_reg);
15513 %}
15514
15515 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15516 %{
15517 match(Set cr (OverflowSubI op1 op2));
15518
15519 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15520 ins_encode %{
15521 __ cmpl($op1$$Register, $op2$$constant);
15522 %}
15523 ins_pipe(ialu_reg_reg);
15524 %}
15525
15526 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15527 %{
15528 match(Set cr (OverflowSubL op1 op2));
15529
15530 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15531 ins_encode %{
15532 __ cmpq($op1$$Register, $op2$$Register);
15533 %}
15534 ins_pipe(ialu_reg_reg);
15535 %}
15536
15537 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15538 %{
15539 match(Set cr (OverflowSubL op1 op2));
15540
15541 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15542 ins_encode %{
15543 __ cmpq($op1$$Register, $op2$$constant);
15544 %}
15545 ins_pipe(ialu_reg_reg);
15546 %}
15547
15548 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15549 %{
15550 match(Set cr (OverflowSubI zero op2));
15551 effect(DEF cr, USE_KILL op2);
15552
15553 format %{ "negl $op2\t# overflow check int" %}
15554 ins_encode %{
15555 __ negl($op2$$Register);
15556 %}
15557 ins_pipe(ialu_reg_reg);
15558 %}
15559
15560 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15561 %{
15562 match(Set cr (OverflowSubL zero op2));
15563 effect(DEF cr, USE_KILL op2);
15564
15565 format %{ "negq $op2\t# overflow check long" %}
15566 ins_encode %{
15567 __ negq($op2$$Register);
15568 %}
15569 ins_pipe(ialu_reg_reg);
15570 %}
15571
15572 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15573 %{
15574 match(Set cr (OverflowMulI op1 op2));
15575 effect(DEF cr, USE_KILL op1, USE op2);
15576
15577 format %{ "imull $op1, $op2\t# overflow check int" %}
15578 ins_encode %{
15579 __ imull($op1$$Register, $op2$$Register);
15580 %}
15581 ins_pipe(ialu_reg_reg_alu0);
15582 %}
15583
15584 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15585 %{
15586 match(Set cr (OverflowMulI op1 op2));
15587 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15588
15589 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15590 ins_encode %{
15591 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15592 %}
15593 ins_pipe(ialu_reg_reg_alu0);
15594 %}
15595
15596 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15597 %{
15598 match(Set cr (OverflowMulL op1 op2));
15599 effect(DEF cr, USE_KILL op1, USE op2);
15600
15601 format %{ "imulq $op1, $op2\t# overflow check long" %}
15602 ins_encode %{
15603 __ imulq($op1$$Register, $op2$$Register);
15604 %}
15605 ins_pipe(ialu_reg_reg_alu0);
15606 %}
15607
15608 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15609 %{
15610 match(Set cr (OverflowMulL op1 op2));
15611 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15612
15613 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15614 ins_encode %{
15615 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15616 %}
15617 ins_pipe(ialu_reg_reg_alu0);
15618 %}
15619
15620
15621 //----------Control Flow Instructions------------------------------------------
15622 // Signed compare Instructions
15623
15624 // XXX more variants!!
15625 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15626 %{
15627 match(Set cr (CmpI op1 op2));
15628 effect(DEF cr, USE op1, USE op2);
15629
15630 format %{ "cmpl $op1, $op2" %}
15631 ins_encode %{
15632 __ cmpl($op1$$Register, $op2$$Register);
15633 %}
15634 ins_pipe(ialu_cr_reg_reg);
15635 %}
15636
15637 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15638 %{
15639 match(Set cr (CmpI op1 op2));
15640
15641 format %{ "cmpl $op1, $op2" %}
15642 ins_encode %{
15643 __ cmpl($op1$$Register, $op2$$constant);
15644 %}
15645 ins_pipe(ialu_cr_reg_imm);
15646 %}
15647
15648 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15649 %{
15650 match(Set cr (CmpI op1 (LoadI op2)));
15651
15652 ins_cost(500); // XXX
15653 format %{ "cmpl $op1, $op2" %}
15654 ins_encode %{
15655 __ cmpl($op1$$Register, $op2$$Address);
15656 %}
15657 ins_pipe(ialu_cr_reg_mem);
15658 %}
15659
15660 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15661 %{
15662 match(Set cr (CmpI src zero));
15663
15664 format %{ "testl $src, $src" %}
15665 ins_encode %{
15666 __ testl($src$$Register, $src$$Register);
15667 %}
15668 ins_pipe(ialu_cr_reg_imm);
15669 %}
15670
15671 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15672 %{
15673 match(Set cr (CmpI (AndI src con) zero));
15674
15675 format %{ "testl $src, $con" %}
15676 ins_encode %{
15677 __ testl($src$$Register, $con$$constant);
15678 %}
15679 ins_pipe(ialu_cr_reg_imm);
15680 %}
15681
15682 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
15683 %{
15684 match(Set cr (CmpI (AndI src1 src2) zero));
15685
15686 format %{ "testl $src1, $src2" %}
15687 ins_encode %{
15688 __ testl($src1$$Register, $src2$$Register);
15689 %}
15690 ins_pipe(ialu_cr_reg_imm);
15691 %}
15692
15693 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
15694 %{
15695 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
15696
15697 format %{ "testl $src, $mem" %}
15698 ins_encode %{
15699 __ testl($src$$Register, $mem$$Address);
15700 %}
15701 ins_pipe(ialu_cr_reg_mem);
15702 %}
15703
15704 // Unsigned compare Instructions; really, same as signed except they
15705 // produce an rFlagsRegU instead of rFlagsReg.
15706 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
15707 %{
15708 match(Set cr (CmpU op1 op2));
15709
15710 format %{ "cmpl $op1, $op2\t# unsigned" %}
15711 ins_encode %{
15712 __ cmpl($op1$$Register, $op2$$Register);
15713 %}
15714 ins_pipe(ialu_cr_reg_reg);
15715 %}
15716
15717 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
15718 %{
15719 match(Set cr (CmpU op1 op2));
15720
15721 format %{ "cmpl $op1, $op2\t# unsigned" %}
15722 ins_encode %{
15723 __ cmpl($op1$$Register, $op2$$constant);
15724 %}
15725 ins_pipe(ialu_cr_reg_imm);
15726 %}
15727
15728 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
15729 %{
15730 match(Set cr (CmpU op1 (LoadI op2)));
15731
15732 ins_cost(500); // XXX
15733 format %{ "cmpl $op1, $op2\t# unsigned" %}
15734 ins_encode %{
15735 __ cmpl($op1$$Register, $op2$$Address);
15736 %}
15737 ins_pipe(ialu_cr_reg_mem);
15738 %}
15739
15740 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
15741 %{
15742 match(Set cr (CmpU src zero));
15743
15744 format %{ "testl $src, $src\t# unsigned" %}
15745 ins_encode %{
15746 __ testl($src$$Register, $src$$Register);
15747 %}
15748 ins_pipe(ialu_cr_reg_imm);
15749 %}
15750
15751 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
15752 %{
15753 match(Set cr (CmpP op1 op2));
15754
15755 format %{ "cmpq $op1, $op2\t# ptr" %}
15756 ins_encode %{
15757 __ cmpq($op1$$Register, $op2$$Register);
15758 %}
15759 ins_pipe(ialu_cr_reg_reg);
15760 %}
15761
15762 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
15763 %{
15764 match(Set cr (CmpP op1 (LoadP op2)));
15765 predicate(n->in(2)->as_Load()->barrier_data() == 0);
15766
15767 ins_cost(500); // XXX
15768 format %{ "cmpq $op1, $op2\t# ptr" %}
15769 ins_encode %{
15770 __ cmpq($op1$$Register, $op2$$Address);
15771 %}
15772 ins_pipe(ialu_cr_reg_mem);
15773 %}
15774
15775 // XXX this is generalized by compP_rReg_mem???
15776 // Compare raw pointer (used in out-of-heap check).
15777 // Only works because non-oop pointers must be raw pointers
15778 // and raw pointers have no anti-dependencies.
15779 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
15780 %{
15781 predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
15782 n->in(2)->as_Load()->barrier_data() == 0);
15783 match(Set cr (CmpP op1 (LoadP op2)));
15784
15785 format %{ "cmpq $op1, $op2\t# raw ptr" %}
15786 ins_encode %{
15787 __ cmpq($op1$$Register, $op2$$Address);
15788 %}
15789 ins_pipe(ialu_cr_reg_mem);
15790 %}
15791
15792 // This will generate a signed flags result. This should be OK since
15793 // any compare to a zero should be eq/neq.
15794 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
15795 %{
15796 match(Set cr (CmpP src zero));
15797
15798 format %{ "testq $src, $src\t# ptr" %}
15799 ins_encode %{
15800 __ testq($src$$Register, $src$$Register);
15801 %}
15802 ins_pipe(ialu_cr_reg_imm);
15803 %}
15804
15805 // This will generate a signed flags result. This should be OK since
15806 // any compare to a zero should be eq/neq.
15807 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
15808 %{
15809 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
15810 n->in(1)->as_Load()->barrier_data() == 0);
15811 match(Set cr (CmpP (LoadP op) zero));
15812
15813 ins_cost(500); // XXX
15814 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
15815 ins_encode %{
15816 __ testq($op$$Address, 0xFFFFFFFF);
15817 %}
15818 ins_pipe(ialu_cr_reg_imm);
15819 %}
15820
15821 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
15822 %{
15823 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
15824 n->in(1)->as_Load()->barrier_data() == 0);
15825 match(Set cr (CmpP (LoadP mem) zero));
15826
15827 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
15828 ins_encode %{
15829 __ cmpq(r12, $mem$$Address);
15830 %}
15831 ins_pipe(ialu_cr_reg_mem);
15832 %}
15833
15834 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
15835 %{
15836 match(Set cr (CmpN op1 op2));
15837
15838 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
15839 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
15840 ins_pipe(ialu_cr_reg_reg);
15841 %}
15842
15843 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
15844 %{
15845 predicate(n->in(2)->as_Load()->barrier_data() == 0);
15846 match(Set cr (CmpN src (LoadN mem)));
15847
15848 format %{ "cmpl $src, $mem\t# compressed ptr" %}
15849 ins_encode %{
15850 __ cmpl($src$$Register, $mem$$Address);
15851 %}
15852 ins_pipe(ialu_cr_reg_mem);
15853 %}
15854
15855 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
15856 match(Set cr (CmpN op1 op2));
15857
15858 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
15859 ins_encode %{
15860 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
15861 %}
15862 ins_pipe(ialu_cr_reg_imm);
15863 %}
15864
15865 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
15866 %{
15867 predicate(n->in(2)->as_Load()->barrier_data() == 0);
15868 match(Set cr (CmpN src (LoadN mem)));
15869
15870 format %{ "cmpl $mem, $src\t# compressed ptr" %}
15871 ins_encode %{
15872 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
15873 %}
15874 ins_pipe(ialu_cr_reg_mem);
15875 %}
15876
15877 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
15878 match(Set cr (CmpN op1 op2));
15879
15880 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
15881 ins_encode %{
15882 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
15883 %}
15884 ins_pipe(ialu_cr_reg_imm);
15885 %}
15886
15887 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
15888 %{
15889 predicate(!UseCompactObjectHeaders);
15890 match(Set cr (CmpN src (LoadNKlass mem)));
15891
15892 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
15893 ins_encode %{
15894 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
15895 %}
15896 ins_pipe(ialu_cr_reg_mem);
15897 %}
15898
15899 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
15900 match(Set cr (CmpN src zero));
15901
15902 format %{ "testl $src, $src\t# compressed ptr" %}
15903 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
15904 ins_pipe(ialu_cr_reg_imm);
15905 %}
15906
15907 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
15908 %{
15909 predicate(CompressedOops::base() != nullptr &&
15910 n->in(1)->as_Load()->barrier_data() == 0);
15911 match(Set cr (CmpN (LoadN mem) zero));
15912
15913 ins_cost(500); // XXX
15914 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
15915 ins_encode %{
15916 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
15917 %}
15918 ins_pipe(ialu_cr_reg_mem);
15919 %}
15920
15921 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
15922 %{
15923 predicate(CompressedOops::base() == nullptr &&
15924 n->in(1)->as_Load()->barrier_data() == 0);
15925 match(Set cr (CmpN (LoadN mem) zero));
15926
15927 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
15928 ins_encode %{
15929 __ cmpl(r12, $mem$$Address);
15930 %}
15931 ins_pipe(ialu_cr_reg_mem);
15932 %}
15933
15934 // Yanked all unsigned pointer compare operations.
15935 // Pointer compares are done with CmpP which is already unsigned.
15936
15937 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15938 %{
15939 match(Set cr (CmpL op1 op2));
15940
15941 format %{ "cmpq $op1, $op2" %}
15942 ins_encode %{
15943 __ cmpq($op1$$Register, $op2$$Register);
15944 %}
15945 ins_pipe(ialu_cr_reg_reg);
15946 %}
15947
15948 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15949 %{
15950 match(Set cr (CmpL op1 op2));
15951
15952 format %{ "cmpq $op1, $op2" %}
15953 ins_encode %{
15954 __ cmpq($op1$$Register, $op2$$constant);
15955 %}
15956 ins_pipe(ialu_cr_reg_imm);
15957 %}
15958
15959 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
15960 %{
15961 match(Set cr (CmpL op1 (LoadL op2)));
15962
15963 format %{ "cmpq $op1, $op2" %}
15964 ins_encode %{
15965 __ cmpq($op1$$Register, $op2$$Address);
15966 %}
15967 ins_pipe(ialu_cr_reg_mem);
15968 %}
15969
15970 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
15971 %{
15972 match(Set cr (CmpL src zero));
15973
15974 format %{ "testq $src, $src" %}
15975 ins_encode %{
15976 __ testq($src$$Register, $src$$Register);
15977 %}
15978 ins_pipe(ialu_cr_reg_imm);
15979 %}
15980
15981 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
15982 %{
15983 match(Set cr (CmpL (AndL src con) zero));
15984
15985 format %{ "testq $src, $con\t# long" %}
15986 ins_encode %{
15987 __ testq($src$$Register, $con$$constant);
15988 %}
15989 ins_pipe(ialu_cr_reg_imm);
15990 %}
15991
15992 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
15993 %{
15994 match(Set cr (CmpL (AndL src1 src2) zero));
15995
15996 format %{ "testq $src1, $src2\t# long" %}
15997 ins_encode %{
15998 __ testq($src1$$Register, $src2$$Register);
15999 %}
16000 ins_pipe(ialu_cr_reg_imm);
16001 %}
16002
16003 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16004 %{
16005 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16006
16007 format %{ "testq $src, $mem" %}
16008 ins_encode %{
16009 __ testq($src$$Register, $mem$$Address);
16010 %}
16011 ins_pipe(ialu_cr_reg_mem);
16012 %}
16013
16014 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16015 %{
16016 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16017
16018 format %{ "testq $src, $mem" %}
16019 ins_encode %{
16020 __ testq($src$$Register, $mem$$Address);
16021 %}
16022 ins_pipe(ialu_cr_reg_mem);
16023 %}
16024
16025 // Manifest a CmpU result in an integer register. Very painful.
16026 // This is the test to avoid.
16027 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16028 %{
16029 match(Set dst (CmpU3 src1 src2));
16030 effect(KILL flags);
16031
16032 ins_cost(275); // XXX
16033 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16034 "movl $dst, -1\n\t"
16035 "jb,u done\n\t"
16036 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16037 "done:" %}
16038 ins_encode %{
16039 Label done;
16040 __ cmpl($src1$$Register, $src2$$Register);
16041 __ movl($dst$$Register, -1);
16042 __ jccb(Assembler::below, done);
16043 __ setcc(Assembler::notZero, $dst$$Register);
16044 __ bind(done);
16045 %}
16046 ins_pipe(pipe_slow);
16047 %}
16048
16049 // Manifest a CmpL result in an integer register. Very painful.
16050 // This is the test to avoid.
16051 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16052 %{
16053 match(Set dst (CmpL3 src1 src2));
16054 effect(KILL flags);
16055
16056 ins_cost(275); // XXX
16057 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16058 "movl $dst, -1\n\t"
16059 "jl,s done\n\t"
16060 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16061 "done:" %}
16062 ins_encode %{
16063 Label done;
16064 __ cmpq($src1$$Register, $src2$$Register);
16065 __ movl($dst$$Register, -1);
16066 __ jccb(Assembler::less, done);
16067 __ setcc(Assembler::notZero, $dst$$Register);
16068 __ bind(done);
16069 %}
16070 ins_pipe(pipe_slow);
16071 %}
16072
16073 // Manifest a CmpUL result in an integer register. Very painful.
16074 // This is the test to avoid.
16075 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16076 %{
16077 match(Set dst (CmpUL3 src1 src2));
16078 effect(KILL flags);
16079
16080 ins_cost(275); // XXX
16081 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16082 "movl $dst, -1\n\t"
16083 "jb,u done\n\t"
16084 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16085 "done:" %}
16086 ins_encode %{
16087 Label done;
16088 __ cmpq($src1$$Register, $src2$$Register);
16089 __ movl($dst$$Register, -1);
16090 __ jccb(Assembler::below, done);
16091 __ setcc(Assembler::notZero, $dst$$Register);
16092 __ bind(done);
16093 %}
16094 ins_pipe(pipe_slow);
16095 %}
16096
16097 // Unsigned long compare Instructions; really, same as signed long except they
16098 // produce an rFlagsRegU instead of rFlagsReg.
16099 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16100 %{
16101 match(Set cr (CmpUL op1 op2));
16102
16103 format %{ "cmpq $op1, $op2\t# unsigned" %}
16104 ins_encode %{
16105 __ cmpq($op1$$Register, $op2$$Register);
16106 %}
16107 ins_pipe(ialu_cr_reg_reg);
16108 %}
16109
16110 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16111 %{
16112 match(Set cr (CmpUL op1 op2));
16113
16114 format %{ "cmpq $op1, $op2\t# unsigned" %}
16115 ins_encode %{
16116 __ cmpq($op1$$Register, $op2$$constant);
16117 %}
16118 ins_pipe(ialu_cr_reg_imm);
16119 %}
16120
16121 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16122 %{
16123 match(Set cr (CmpUL op1 (LoadL op2)));
16124
16125 format %{ "cmpq $op1, $op2\t# unsigned" %}
16126 ins_encode %{
16127 __ cmpq($op1$$Register, $op2$$Address);
16128 %}
16129 ins_pipe(ialu_cr_reg_mem);
16130 %}
16131
16132 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16133 %{
16134 match(Set cr (CmpUL src zero));
16135
16136 format %{ "testq $src, $src\t# unsigned" %}
16137 ins_encode %{
16138 __ testq($src$$Register, $src$$Register);
16139 %}
16140 ins_pipe(ialu_cr_reg_imm);
16141 %}
16142
16143 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16144 %{
16145 match(Set cr (CmpI (LoadB mem) imm));
16146
16147 ins_cost(125);
16148 format %{ "cmpb $mem, $imm" %}
16149 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16150 ins_pipe(ialu_cr_reg_mem);
16151 %}
16152
16153 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16154 %{
16155 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16156
16157 ins_cost(125);
16158 format %{ "testb $mem, $imm\t# ubyte" %}
16159 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16160 ins_pipe(ialu_cr_reg_mem);
16161 %}
16162
16163 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16164 %{
16165 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16166
16167 ins_cost(125);
16168 format %{ "testb $mem, $imm\t# byte" %}
16169 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16170 ins_pipe(ialu_cr_reg_mem);
16171 %}
16172
16173 //----------Max and Min--------------------------------------------------------
16174 // Min Instructions
16175
16176 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16177 %{
16178 predicate(!UseAPX);
16179 effect(USE_DEF dst, USE src, USE cr);
16180
16181 format %{ "cmovlgt $dst, $src\t# min" %}
16182 ins_encode %{
16183 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16184 %}
16185 ins_pipe(pipe_cmov_reg);
16186 %}
16187
16188 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16189 %{
16190 predicate(UseAPX);
16191 effect(DEF dst, USE src1, USE src2, USE cr);
16192
16193 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16194 ins_encode %{
16195 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16196 %}
16197 ins_pipe(pipe_cmov_reg);
16198 %}
16199
16200 instruct minI_rReg(rRegI dst, rRegI src)
16201 %{
16202 predicate(!UseAPX);
16203 match(Set dst (MinI dst src));
16204
16205 ins_cost(200);
16206 expand %{
16207 rFlagsReg cr;
16208 compI_rReg(cr, dst, src);
16209 cmovI_reg_g(dst, src, cr);
16210 %}
16211 %}
16212
16213 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16214 %{
16215 predicate(UseAPX);
16216 match(Set dst (MinI src1 src2));
16217 effect(DEF dst, USE src1, USE src2);
16218 flag(PD::Flag_ndd_demotable_opr1);
16219
16220 ins_cost(200);
16221 expand %{
16222 rFlagsReg cr;
16223 compI_rReg(cr, src1, src2);
16224 cmovI_reg_g_ndd(dst, src1, src2, cr);
16225 %}
16226 %}
16227
16228 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16229 %{
16230 predicate(!UseAPX);
16231 effect(USE_DEF dst, USE src, USE cr);
16232
16233 format %{ "cmovllt $dst, $src\t# max" %}
16234 ins_encode %{
16235 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16236 %}
16237 ins_pipe(pipe_cmov_reg);
16238 %}
16239
16240 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16241 %{
16242 predicate(UseAPX);
16243 effect(DEF dst, USE src1, USE src2, USE cr);
16244
16245 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16246 ins_encode %{
16247 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16248 %}
16249 ins_pipe(pipe_cmov_reg);
16250 %}
16251
16252 instruct maxI_rReg(rRegI dst, rRegI src)
16253 %{
16254 predicate(!UseAPX);
16255 match(Set dst (MaxI dst src));
16256
16257 ins_cost(200);
16258 expand %{
16259 rFlagsReg cr;
16260 compI_rReg(cr, dst, src);
16261 cmovI_reg_l(dst, src, cr);
16262 %}
16263 %}
16264
16265 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16266 %{
16267 predicate(UseAPX);
16268 match(Set dst (MaxI src1 src2));
16269 effect(DEF dst, USE src1, USE src2);
16270 flag(PD::Flag_ndd_demotable_opr1);
16271
16272 ins_cost(200);
16273 expand %{
16274 rFlagsReg cr;
16275 compI_rReg(cr, src1, src2);
16276 cmovI_reg_l_ndd(dst, src1, src2, cr);
16277 %}
16278 %}
16279
16280 // ============================================================================
16281 // Branch Instructions
16282
16283 // Jump Direct - Label defines a relative address from JMP+1
16284 instruct jmpDir(label labl)
16285 %{
16286 match(Goto);
16287 effect(USE labl);
16288
16289 ins_cost(300);
16290 format %{ "jmp $labl" %}
16291 size(5);
16292 ins_encode %{
16293 Label* L = $labl$$label;
16294 __ jmp(*L, false); // Always long jump
16295 %}
16296 ins_pipe(pipe_jmp);
16297 %}
16298
16299 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16300 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16301 %{
16302 match(If cop cr);
16303 effect(USE labl);
16304
16305 ins_cost(300);
16306 format %{ "j$cop $labl" %}
16307 size(6);
16308 ins_encode %{
16309 Label* L = $labl$$label;
16310 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16311 %}
16312 ins_pipe(pipe_jcc);
16313 %}
16314
16315 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16316 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16317 %{
16318 match(CountedLoopEnd cop cr);
16319 effect(USE labl);
16320
16321 ins_cost(300);
16322 format %{ "j$cop $labl\t# loop end" %}
16323 size(6);
16324 ins_encode %{
16325 Label* L = $labl$$label;
16326 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16327 %}
16328 ins_pipe(pipe_jcc);
16329 %}
16330
16331 // Jump Direct Conditional - using unsigned comparison
16332 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16333 match(If cop cmp);
16334 effect(USE labl);
16335
16336 ins_cost(300);
16337 format %{ "j$cop,u $labl" %}
16338 size(6);
16339 ins_encode %{
16340 Label* L = $labl$$label;
16341 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16342 %}
16343 ins_pipe(pipe_jcc);
16344 %}
16345
16346 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16347 match(If cop cmp);
16348 effect(USE labl);
16349
16350 ins_cost(200);
16351 format %{ "j$cop,u $labl" %}
16352 size(6);
16353 ins_encode %{
16354 Label* L = $labl$$label;
16355 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16356 %}
16357 ins_pipe(pipe_jcc);
16358 %}
16359
16360 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16361 match(If cop cmp);
16362 effect(USE labl);
16363
16364 ins_cost(200);
16365 format %{ $$template
16366 if ($cop$$cmpcode == Assembler::notEqual) {
16367 $$emit$$"jp,u $labl\n\t"
16368 $$emit$$"j$cop,u $labl"
16369 } else {
16370 $$emit$$"jp,u done\n\t"
16371 $$emit$$"j$cop,u $labl\n\t"
16372 $$emit$$"done:"
16373 }
16374 %}
16375 ins_encode %{
16376 Label* l = $labl$$label;
16377 if ($cop$$cmpcode == Assembler::notEqual) {
16378 __ jcc(Assembler::parity, *l, false);
16379 __ jcc(Assembler::notEqual, *l, false);
16380 } else if ($cop$$cmpcode == Assembler::equal) {
16381 Label done;
16382 __ jccb(Assembler::parity, done);
16383 __ jcc(Assembler::equal, *l, false);
16384 __ bind(done);
16385 } else {
16386 ShouldNotReachHere();
16387 }
16388 %}
16389 ins_pipe(pipe_jcc);
16390 %}
16391
16392 // Jump Direct Conditional - using signed and unsigned comparison
16393 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16394 match(If cop cmp);
16395 effect(USE labl);
16396
16397 ins_cost(200);
16398 format %{ "j$cop,su $labl" %}
16399 size(6);
16400 ins_encode %{
16401 Label* L = $labl$$label;
16402 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16403 %}
16404 ins_pipe(pipe_jcc);
16405 %}
16406
16407 // ============================================================================
16408 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16409 // superklass array for an instance of the superklass. Set a hidden
16410 // internal cache on a hit (cache is checked with exposed code in
16411 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16412 // encoding ALSO sets flags.
16413
16414 instruct partialSubtypeCheck(rdi_RegP result,
16415 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16416 rFlagsReg cr)
16417 %{
16418 match(Set result (PartialSubtypeCheck sub super));
16419 predicate(!UseSecondarySupersTable);
16420 effect(KILL rcx, KILL cr);
16421
16422 ins_cost(1100); // slightly larger than the next version
16423 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16424 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16425 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16426 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16427 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16428 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16429 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16430 "miss:\t" %}
16431
16432 ins_encode %{
16433 Label miss;
16434 // NB: Callers may assume that, when $result is a valid register,
16435 // check_klass_subtype_slow_path_linear sets it to a nonzero
16436 // value.
16437 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16438 $rcx$$Register, $result$$Register,
16439 nullptr, &miss,
16440 /*set_cond_codes:*/ true);
16441 __ xorptr($result$$Register, $result$$Register);
16442 __ bind(miss);
16443 %}
16444
16445 ins_pipe(pipe_slow);
16446 %}
16447
16448 // ============================================================================
16449 // Two versions of hashtable-based partialSubtypeCheck, both used when
16450 // we need to search for a super class in the secondary supers array.
16451 // The first is used when we don't know _a priori_ the class being
16452 // searched for. The second, far more common, is used when we do know:
16453 // this is used for instanceof, checkcast, and any case where C2 can
16454 // determine it by constant propagation.
16455
16456 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16457 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16458 rFlagsReg cr)
16459 %{
16460 match(Set result (PartialSubtypeCheck sub super));
16461 predicate(UseSecondarySupersTable);
16462 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16463
16464 ins_cost(1000);
16465 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16466
16467 ins_encode %{
16468 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16469 $temp3$$Register, $temp4$$Register, $result$$Register);
16470 %}
16471
16472 ins_pipe(pipe_slow);
16473 %}
16474
16475 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16476 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16477 rFlagsReg cr)
16478 %{
16479 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16480 predicate(UseSecondarySupersTable);
16481 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16482
16483 ins_cost(700); // smaller than the next version
16484 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16485
16486 ins_encode %{
16487 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16488 if (InlineSecondarySupersTest) {
16489 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16490 $temp3$$Register, $temp4$$Register, $result$$Register,
16491 super_klass_slot);
16492 } else {
16493 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16494 }
16495 %}
16496
16497 ins_pipe(pipe_slow);
16498 %}
16499
16500 // ============================================================================
16501 // Branch Instructions -- short offset versions
16502 //
16503 // These instructions are used to replace jumps of a long offset (the default
16504 // match) with jumps of a shorter offset. These instructions are all tagged
16505 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16506 // match rules in general matching. Instead, the ADLC generates a conversion
16507 // method in the MachNode which can be used to do in-place replacement of the
16508 // long variant with the shorter variant. The compiler will determine if a
16509 // branch can be taken by the is_short_branch_offset() predicate in the machine
16510 // specific code section of the file.
16511
16512 // Jump Direct - Label defines a relative address from JMP+1
16513 instruct jmpDir_short(label labl) %{
16514 match(Goto);
16515 effect(USE labl);
16516
16517 ins_cost(300);
16518 format %{ "jmp,s $labl" %}
16519 size(2);
16520 ins_encode %{
16521 Label* L = $labl$$label;
16522 __ jmpb(*L);
16523 %}
16524 ins_pipe(pipe_jmp);
16525 ins_short_branch(1);
16526 %}
16527
16528 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16529 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16530 match(If cop cr);
16531 effect(USE labl);
16532
16533 ins_cost(300);
16534 format %{ "j$cop,s $labl" %}
16535 size(2);
16536 ins_encode %{
16537 Label* L = $labl$$label;
16538 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16539 %}
16540 ins_pipe(pipe_jcc);
16541 ins_short_branch(1);
16542 %}
16543
16544 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16545 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16546 match(CountedLoopEnd cop cr);
16547 effect(USE labl);
16548
16549 ins_cost(300);
16550 format %{ "j$cop,s $labl\t# loop end" %}
16551 size(2);
16552 ins_encode %{
16553 Label* L = $labl$$label;
16554 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16555 %}
16556 ins_pipe(pipe_jcc);
16557 ins_short_branch(1);
16558 %}
16559
16560 // Jump Direct Conditional - using unsigned comparison
16561 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16562 match(If cop cmp);
16563 effect(USE labl);
16564
16565 ins_cost(300);
16566 format %{ "j$cop,us $labl" %}
16567 size(2);
16568 ins_encode %{
16569 Label* L = $labl$$label;
16570 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16571 %}
16572 ins_pipe(pipe_jcc);
16573 ins_short_branch(1);
16574 %}
16575
16576 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16577 match(If cop cmp);
16578 effect(USE labl);
16579
16580 ins_cost(300);
16581 format %{ "j$cop,us $labl" %}
16582 size(2);
16583 ins_encode %{
16584 Label* L = $labl$$label;
16585 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16586 %}
16587 ins_pipe(pipe_jcc);
16588 ins_short_branch(1);
16589 %}
16590
16591 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16592 match(If cop cmp);
16593 effect(USE labl);
16594
16595 ins_cost(300);
16596 format %{ $$template
16597 if ($cop$$cmpcode == Assembler::notEqual) {
16598 $$emit$$"jp,u,s $labl\n\t"
16599 $$emit$$"j$cop,u,s $labl"
16600 } else {
16601 $$emit$$"jp,u,s done\n\t"
16602 $$emit$$"j$cop,u,s $labl\n\t"
16603 $$emit$$"done:"
16604 }
16605 %}
16606 size(4);
16607 ins_encode %{
16608 Label* l = $labl$$label;
16609 if ($cop$$cmpcode == Assembler::notEqual) {
16610 __ jccb(Assembler::parity, *l);
16611 __ jccb(Assembler::notEqual, *l);
16612 } else if ($cop$$cmpcode == Assembler::equal) {
16613 Label done;
16614 __ jccb(Assembler::parity, done);
16615 __ jccb(Assembler::equal, *l);
16616 __ bind(done);
16617 } else {
16618 ShouldNotReachHere();
16619 }
16620 %}
16621 ins_pipe(pipe_jcc);
16622 ins_short_branch(1);
16623 %}
16624
16625 // Jump Direct Conditional - using signed and unsigned comparison
16626 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16627 match(If cop cmp);
16628 effect(USE labl);
16629
16630 ins_cost(300);
16631 format %{ "j$cop,sus $labl" %}
16632 size(2);
16633 ins_encode %{
16634 Label* L = $labl$$label;
16635 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16636 %}
16637 ins_pipe(pipe_jcc);
16638 ins_short_branch(1);
16639 %}
16640
16641 // ============================================================================
16642 // inlined locking and unlocking
16643
16644 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16645 match(Set cr (FastLock object box));
16646 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16647 ins_cost(300);
16648 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16649 ins_encode %{
16650 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16651 %}
16652 ins_pipe(pipe_slow);
16653 %}
16654
16655 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16656 match(Set cr (FastUnlock object rax_reg));
16657 effect(TEMP tmp, USE_KILL rax_reg);
16658 ins_cost(300);
16659 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16660 ins_encode %{
16661 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16662 %}
16663 ins_pipe(pipe_slow);
16664 %}
16665
16666
16667 // ============================================================================
16668 // Safepoint Instructions
16669 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16670 %{
16671 match(SafePoint poll);
16672 effect(KILL cr, USE poll);
16673
16674 format %{ "testl rax, [$poll]\t"
16675 "# Safepoint: poll for GC" %}
16676 ins_cost(125);
16677 ins_encode %{
16678 __ relocate(relocInfo::poll_type);
16679 address pre_pc = __ pc();
16680 __ testl(rax, Address($poll$$Register, 0));
16681 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16682 %}
16683 ins_pipe(ialu_reg_mem);
16684 %}
16685
16686 instruct mask_all_evexL(kReg dst, rRegL src) %{
16687 match(Set dst (MaskAll src));
16688 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16689 ins_encode %{
16690 int mask_len = Matcher::vector_length(this);
16691 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16692 %}
16693 ins_pipe( pipe_slow );
16694 %}
16695
16696 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16697 predicate(Matcher::vector_length(n) > 32);
16698 match(Set dst (MaskAll src));
16699 effect(TEMP tmp);
16700 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16701 ins_encode %{
16702 int mask_len = Matcher::vector_length(this);
16703 __ movslq($tmp$$Register, $src$$Register);
16704 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16705 %}
16706 ins_pipe( pipe_slow );
16707 %}
16708
16709 // ============================================================================
16710 // Procedure Call/Return Instructions
16711 // Call Java Static Instruction
16712 // Note: If this code changes, the corresponding ret_addr_offset() and
16713 // compute_padding() functions will have to be adjusted.
16714 instruct CallStaticJavaDirect(method meth) %{
16715 match(CallStaticJava);
16716 effect(USE meth);
16717
16718 ins_cost(300);
16719 format %{ "call,static " %}
16720 opcode(0xE8); /* E8 cd */
16721 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
16722 ins_pipe(pipe_slow);
16723 ins_alignment(4);
16724 %}
16725
16726 // Call Java Dynamic Instruction
16727 // Note: If this code changes, the corresponding ret_addr_offset() and
16728 // compute_padding() functions will have to be adjusted.
16729 instruct CallDynamicJavaDirect(method meth)
16730 %{
16731 match(CallDynamicJava);
16732 effect(USE meth);
16733
16734 ins_cost(300);
16735 format %{ "movq rax, #Universe::non_oop_word()\n\t"
16736 "call,dynamic " %}
16737 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
16738 ins_pipe(pipe_slow);
16739 ins_alignment(4);
16740 %}
16741
16742 // Call Runtime Instruction
16743 instruct CallRuntimeDirect(method meth)
16744 %{
16745 match(CallRuntime);
16746 effect(USE meth);
16747
16748 ins_cost(300);
16749 format %{ "call,runtime " %}
16750 ins_encode(clear_avx, Java_To_Runtime(meth));
16751 ins_pipe(pipe_slow);
16752 %}
16753
16754 // Call runtime without safepoint
16755 instruct CallLeafDirect(method meth)
16756 %{
16757 match(CallLeaf);
16758 effect(USE meth);
16759
16760 ins_cost(300);
16761 format %{ "call_leaf,runtime " %}
16762 ins_encode(clear_avx, Java_To_Runtime(meth));
16763 ins_pipe(pipe_slow);
16764 %}
16765
16766 // Call runtime without safepoint and with vector arguments
16767 instruct CallLeafDirectVector(method meth)
16768 %{
16769 match(CallLeafVector);
16770 effect(USE meth);
16771
16772 ins_cost(300);
16773 format %{ "call_leaf,vector " %}
16774 ins_encode(Java_To_Runtime(meth));
16775 ins_pipe(pipe_slow);
16776 %}
16777
16778 // Call runtime without safepoint
16779 instruct CallLeafNoFPDirect(method meth)
16780 %{
16781 match(CallLeafNoFP);
16782 effect(USE meth);
16783
16784 ins_cost(300);
16785 format %{ "call_leaf_nofp,runtime " %}
16786 ins_encode(clear_avx, Java_To_Runtime(meth));
16787 ins_pipe(pipe_slow);
16788 %}
16789
16790 // Return Instruction
16791 // Remove the return address & jump to it.
16792 // Notice: We always emit a nop after a ret to make sure there is room
16793 // for safepoint patching
16794 instruct Ret()
16795 %{
16796 match(Return);
16797
16798 format %{ "ret" %}
16799 ins_encode %{
16800 __ ret(0);
16801 %}
16802 ins_pipe(pipe_jmp);
16803 %}
16804
16805 // Tail Call; Jump from runtime stub to Java code.
16806 // Also known as an 'interprocedural jump'.
16807 // Target of jump will eventually return to caller.
16808 // TailJump below removes the return address.
16809 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
16810 // emitted just above the TailCall which has reset rbp to the caller state.
16811 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
16812 %{
16813 match(TailCall jump_target method_ptr);
16814
16815 ins_cost(300);
16816 format %{ "jmp $jump_target\t# rbx holds method" %}
16817 ins_encode %{
16818 __ jmp($jump_target$$Register);
16819 %}
16820 ins_pipe(pipe_jmp);
16821 %}
16822
16823 // Tail Jump; remove the return address; jump to target.
16824 // TailCall above leaves the return address around.
16825 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
16826 %{
16827 match(TailJump jump_target ex_oop);
16828
16829 ins_cost(300);
16830 format %{ "popq rdx\t# pop return address\n\t"
16831 "jmp $jump_target" %}
16832 ins_encode %{
16833 __ popq(as_Register(RDX_enc));
16834 __ jmp($jump_target$$Register);
16835 %}
16836 ins_pipe(pipe_jmp);
16837 %}
16838
16839 // Forward exception.
16840 instruct ForwardExceptionjmp()
16841 %{
16842 match(ForwardException);
16843
16844 format %{ "jmp forward_exception_stub" %}
16845 ins_encode %{
16846 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
16847 %}
16848 ins_pipe(pipe_jmp);
16849 %}
16850
16851 // Create exception oop: created by stack-crawling runtime code.
16852 // Created exception is now available to this handler, and is setup
16853 // just prior to jumping to this handler. No code emitted.
16854 instruct CreateException(rax_RegP ex_oop)
16855 %{
16856 match(Set ex_oop (CreateEx));
16857
16858 size(0);
16859 // use the following format syntax
16860 format %{ "# exception oop is in rax; no code emitted" %}
16861 ins_encode();
16862 ins_pipe(empty);
16863 %}
16864
16865 // Rethrow exception:
16866 // The exception oop will come in the first argument position.
16867 // Then JUMP (not call) to the rethrow stub code.
16868 instruct RethrowException()
16869 %{
16870 match(Rethrow);
16871
16872 // use the following format syntax
16873 format %{ "jmp rethrow_stub" %}
16874 ins_encode %{
16875 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
16876 %}
16877 ins_pipe(pipe_jmp);
16878 %}
16879
16880 // ============================================================================
16881 // This name is KNOWN by the ADLC and cannot be changed.
16882 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16883 // for this guy.
16884 instruct tlsLoadP(r15_RegP dst) %{
16885 match(Set dst (ThreadLocal));
16886 effect(DEF dst);
16887
16888 size(0);
16889 format %{ "# TLS is in R15" %}
16890 ins_encode( /*empty encoding*/ );
16891 ins_pipe(ialu_reg_reg);
16892 %}
16893
16894 instruct addF_reg(regF dst, regF src) %{
16895 predicate(UseAVX == 0);
16896 match(Set dst (AddF dst src));
16897
16898 format %{ "addss $dst, $src" %}
16899 ins_cost(150);
16900 ins_encode %{
16901 __ addss($dst$$XMMRegister, $src$$XMMRegister);
16902 %}
16903 ins_pipe(pipe_slow);
16904 %}
16905
16906 instruct addF_mem(regF dst, memory src) %{
16907 predicate(UseAVX == 0);
16908 match(Set dst (AddF dst (LoadF src)));
16909
16910 format %{ "addss $dst, $src" %}
16911 ins_cost(150);
16912 ins_encode %{
16913 __ addss($dst$$XMMRegister, $src$$Address);
16914 %}
16915 ins_pipe(pipe_slow);
16916 %}
16917
16918 instruct addF_imm(regF dst, immF con) %{
16919 predicate(UseAVX == 0);
16920 match(Set dst (AddF dst con));
16921 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
16922 ins_cost(150);
16923 ins_encode %{
16924 __ addss($dst$$XMMRegister, $constantaddress($con));
16925 %}
16926 ins_pipe(pipe_slow);
16927 %}
16928
16929 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
16930 predicate(UseAVX > 0);
16931 match(Set dst (AddF src1 src2));
16932
16933 format %{ "vaddss $dst, $src1, $src2" %}
16934 ins_cost(150);
16935 ins_encode %{
16936 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
16937 %}
16938 ins_pipe(pipe_slow);
16939 %}
16940
16941 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
16942 predicate(UseAVX > 0);
16943 match(Set dst (AddF src1 (LoadF src2)));
16944
16945 format %{ "vaddss $dst, $src1, $src2" %}
16946 ins_cost(150);
16947 ins_encode %{
16948 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
16949 %}
16950 ins_pipe(pipe_slow);
16951 %}
16952
16953 instruct addF_reg_imm(regF dst, regF src, immF con) %{
16954 predicate(UseAVX > 0);
16955 match(Set dst (AddF src con));
16956
16957 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
16958 ins_cost(150);
16959 ins_encode %{
16960 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
16961 %}
16962 ins_pipe(pipe_slow);
16963 %}
16964
16965 instruct addD_reg(regD dst, regD src) %{
16966 predicate(UseAVX == 0);
16967 match(Set dst (AddD dst src));
16968
16969 format %{ "addsd $dst, $src" %}
16970 ins_cost(150);
16971 ins_encode %{
16972 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
16973 %}
16974 ins_pipe(pipe_slow);
16975 %}
16976
16977 instruct addD_mem(regD dst, memory src) %{
16978 predicate(UseAVX == 0);
16979 match(Set dst (AddD dst (LoadD src)));
16980
16981 format %{ "addsd $dst, $src" %}
16982 ins_cost(150);
16983 ins_encode %{
16984 __ addsd($dst$$XMMRegister, $src$$Address);
16985 %}
16986 ins_pipe(pipe_slow);
16987 %}
16988
16989 instruct addD_imm(regD dst, immD con) %{
16990 predicate(UseAVX == 0);
16991 match(Set dst (AddD dst con));
16992 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
16993 ins_cost(150);
16994 ins_encode %{
16995 __ addsd($dst$$XMMRegister, $constantaddress($con));
16996 %}
16997 ins_pipe(pipe_slow);
16998 %}
16999
17000 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17001 predicate(UseAVX > 0);
17002 match(Set dst (AddD src1 src2));
17003
17004 format %{ "vaddsd $dst, $src1, $src2" %}
17005 ins_cost(150);
17006 ins_encode %{
17007 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17008 %}
17009 ins_pipe(pipe_slow);
17010 %}
17011
17012 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17013 predicate(UseAVX > 0);
17014 match(Set dst (AddD src1 (LoadD src2)));
17015
17016 format %{ "vaddsd $dst, $src1, $src2" %}
17017 ins_cost(150);
17018 ins_encode %{
17019 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17020 %}
17021 ins_pipe(pipe_slow);
17022 %}
17023
17024 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17025 predicate(UseAVX > 0);
17026 match(Set dst (AddD src con));
17027
17028 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17029 ins_cost(150);
17030 ins_encode %{
17031 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17032 %}
17033 ins_pipe(pipe_slow);
17034 %}
17035
17036 instruct subF_reg(regF dst, regF src) %{
17037 predicate(UseAVX == 0);
17038 match(Set dst (SubF dst src));
17039
17040 format %{ "subss $dst, $src" %}
17041 ins_cost(150);
17042 ins_encode %{
17043 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17044 %}
17045 ins_pipe(pipe_slow);
17046 %}
17047
17048 instruct subF_mem(regF dst, memory src) %{
17049 predicate(UseAVX == 0);
17050 match(Set dst (SubF dst (LoadF src)));
17051
17052 format %{ "subss $dst, $src" %}
17053 ins_cost(150);
17054 ins_encode %{
17055 __ subss($dst$$XMMRegister, $src$$Address);
17056 %}
17057 ins_pipe(pipe_slow);
17058 %}
17059
17060 instruct subF_imm(regF dst, immF con) %{
17061 predicate(UseAVX == 0);
17062 match(Set dst (SubF dst con));
17063 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17064 ins_cost(150);
17065 ins_encode %{
17066 __ subss($dst$$XMMRegister, $constantaddress($con));
17067 %}
17068 ins_pipe(pipe_slow);
17069 %}
17070
17071 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17072 predicate(UseAVX > 0);
17073 match(Set dst (SubF src1 src2));
17074
17075 format %{ "vsubss $dst, $src1, $src2" %}
17076 ins_cost(150);
17077 ins_encode %{
17078 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17079 %}
17080 ins_pipe(pipe_slow);
17081 %}
17082
17083 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17084 predicate(UseAVX > 0);
17085 match(Set dst (SubF src1 (LoadF src2)));
17086
17087 format %{ "vsubss $dst, $src1, $src2" %}
17088 ins_cost(150);
17089 ins_encode %{
17090 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17091 %}
17092 ins_pipe(pipe_slow);
17093 %}
17094
17095 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17096 predicate(UseAVX > 0);
17097 match(Set dst (SubF src con));
17098
17099 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17100 ins_cost(150);
17101 ins_encode %{
17102 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17103 %}
17104 ins_pipe(pipe_slow);
17105 %}
17106
17107 instruct subD_reg(regD dst, regD src) %{
17108 predicate(UseAVX == 0);
17109 match(Set dst (SubD dst src));
17110
17111 format %{ "subsd $dst, $src" %}
17112 ins_cost(150);
17113 ins_encode %{
17114 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17115 %}
17116 ins_pipe(pipe_slow);
17117 %}
17118
17119 instruct subD_mem(regD dst, memory src) %{
17120 predicate(UseAVX == 0);
17121 match(Set dst (SubD dst (LoadD src)));
17122
17123 format %{ "subsd $dst, $src" %}
17124 ins_cost(150);
17125 ins_encode %{
17126 __ subsd($dst$$XMMRegister, $src$$Address);
17127 %}
17128 ins_pipe(pipe_slow);
17129 %}
17130
17131 instruct subD_imm(regD dst, immD con) %{
17132 predicate(UseAVX == 0);
17133 match(Set dst (SubD dst con));
17134 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17135 ins_cost(150);
17136 ins_encode %{
17137 __ subsd($dst$$XMMRegister, $constantaddress($con));
17138 %}
17139 ins_pipe(pipe_slow);
17140 %}
17141
17142 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17143 predicate(UseAVX > 0);
17144 match(Set dst (SubD src1 src2));
17145
17146 format %{ "vsubsd $dst, $src1, $src2" %}
17147 ins_cost(150);
17148 ins_encode %{
17149 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17150 %}
17151 ins_pipe(pipe_slow);
17152 %}
17153
17154 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17155 predicate(UseAVX > 0);
17156 match(Set dst (SubD src1 (LoadD src2)));
17157
17158 format %{ "vsubsd $dst, $src1, $src2" %}
17159 ins_cost(150);
17160 ins_encode %{
17161 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17162 %}
17163 ins_pipe(pipe_slow);
17164 %}
17165
17166 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17167 predicate(UseAVX > 0);
17168 match(Set dst (SubD src con));
17169
17170 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17171 ins_cost(150);
17172 ins_encode %{
17173 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17174 %}
17175 ins_pipe(pipe_slow);
17176 %}
17177
17178 instruct mulF_reg(regF dst, regF src) %{
17179 predicate(UseAVX == 0);
17180 match(Set dst (MulF dst src));
17181
17182 format %{ "mulss $dst, $src" %}
17183 ins_cost(150);
17184 ins_encode %{
17185 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17186 %}
17187 ins_pipe(pipe_slow);
17188 %}
17189
17190 instruct mulF_mem(regF dst, memory src) %{
17191 predicate(UseAVX == 0);
17192 match(Set dst (MulF dst (LoadF src)));
17193
17194 format %{ "mulss $dst, $src" %}
17195 ins_cost(150);
17196 ins_encode %{
17197 __ mulss($dst$$XMMRegister, $src$$Address);
17198 %}
17199 ins_pipe(pipe_slow);
17200 %}
17201
17202 instruct mulF_imm(regF dst, immF con) %{
17203 predicate(UseAVX == 0);
17204 match(Set dst (MulF dst con));
17205 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17206 ins_cost(150);
17207 ins_encode %{
17208 __ mulss($dst$$XMMRegister, $constantaddress($con));
17209 %}
17210 ins_pipe(pipe_slow);
17211 %}
17212
17213 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17214 predicate(UseAVX > 0);
17215 match(Set dst (MulF src1 src2));
17216
17217 format %{ "vmulss $dst, $src1, $src2" %}
17218 ins_cost(150);
17219 ins_encode %{
17220 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17221 %}
17222 ins_pipe(pipe_slow);
17223 %}
17224
17225 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17226 predicate(UseAVX > 0);
17227 match(Set dst (MulF src1 (LoadF src2)));
17228
17229 format %{ "vmulss $dst, $src1, $src2" %}
17230 ins_cost(150);
17231 ins_encode %{
17232 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17233 %}
17234 ins_pipe(pipe_slow);
17235 %}
17236
17237 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17238 predicate(UseAVX > 0);
17239 match(Set dst (MulF src con));
17240
17241 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17242 ins_cost(150);
17243 ins_encode %{
17244 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17245 %}
17246 ins_pipe(pipe_slow);
17247 %}
17248
17249 instruct mulD_reg(regD dst, regD src) %{
17250 predicate(UseAVX == 0);
17251 match(Set dst (MulD dst src));
17252
17253 format %{ "mulsd $dst, $src" %}
17254 ins_cost(150);
17255 ins_encode %{
17256 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17257 %}
17258 ins_pipe(pipe_slow);
17259 %}
17260
17261 instruct mulD_mem(regD dst, memory src) %{
17262 predicate(UseAVX == 0);
17263 match(Set dst (MulD dst (LoadD src)));
17264
17265 format %{ "mulsd $dst, $src" %}
17266 ins_cost(150);
17267 ins_encode %{
17268 __ mulsd($dst$$XMMRegister, $src$$Address);
17269 %}
17270 ins_pipe(pipe_slow);
17271 %}
17272
17273 instruct mulD_imm(regD dst, immD con) %{
17274 predicate(UseAVX == 0);
17275 match(Set dst (MulD dst con));
17276 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17277 ins_cost(150);
17278 ins_encode %{
17279 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17280 %}
17281 ins_pipe(pipe_slow);
17282 %}
17283
17284 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17285 predicate(UseAVX > 0);
17286 match(Set dst (MulD src1 src2));
17287
17288 format %{ "vmulsd $dst, $src1, $src2" %}
17289 ins_cost(150);
17290 ins_encode %{
17291 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17292 %}
17293 ins_pipe(pipe_slow);
17294 %}
17295
17296 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17297 predicate(UseAVX > 0);
17298 match(Set dst (MulD src1 (LoadD src2)));
17299
17300 format %{ "vmulsd $dst, $src1, $src2" %}
17301 ins_cost(150);
17302 ins_encode %{
17303 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17304 %}
17305 ins_pipe(pipe_slow);
17306 %}
17307
17308 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17309 predicate(UseAVX > 0);
17310 match(Set dst (MulD src con));
17311
17312 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17313 ins_cost(150);
17314 ins_encode %{
17315 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17316 %}
17317 ins_pipe(pipe_slow);
17318 %}
17319
17320 instruct divF_reg(regF dst, regF src) %{
17321 predicate(UseAVX == 0);
17322 match(Set dst (DivF dst src));
17323
17324 format %{ "divss $dst, $src" %}
17325 ins_cost(150);
17326 ins_encode %{
17327 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17328 %}
17329 ins_pipe(pipe_slow);
17330 %}
17331
17332 instruct divF_mem(regF dst, memory src) %{
17333 predicate(UseAVX == 0);
17334 match(Set dst (DivF dst (LoadF src)));
17335
17336 format %{ "divss $dst, $src" %}
17337 ins_cost(150);
17338 ins_encode %{
17339 __ divss($dst$$XMMRegister, $src$$Address);
17340 %}
17341 ins_pipe(pipe_slow);
17342 %}
17343
17344 instruct divF_imm(regF dst, immF con) %{
17345 predicate(UseAVX == 0);
17346 match(Set dst (DivF dst con));
17347 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17348 ins_cost(150);
17349 ins_encode %{
17350 __ divss($dst$$XMMRegister, $constantaddress($con));
17351 %}
17352 ins_pipe(pipe_slow);
17353 %}
17354
17355 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17356 predicate(UseAVX > 0);
17357 match(Set dst (DivF src1 src2));
17358
17359 format %{ "vdivss $dst, $src1, $src2" %}
17360 ins_cost(150);
17361 ins_encode %{
17362 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17363 %}
17364 ins_pipe(pipe_slow);
17365 %}
17366
17367 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17368 predicate(UseAVX > 0);
17369 match(Set dst (DivF src1 (LoadF src2)));
17370
17371 format %{ "vdivss $dst, $src1, $src2" %}
17372 ins_cost(150);
17373 ins_encode %{
17374 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17375 %}
17376 ins_pipe(pipe_slow);
17377 %}
17378
17379 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17380 predicate(UseAVX > 0);
17381 match(Set dst (DivF src con));
17382
17383 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17384 ins_cost(150);
17385 ins_encode %{
17386 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17387 %}
17388 ins_pipe(pipe_slow);
17389 %}
17390
17391 instruct divD_reg(regD dst, regD src) %{
17392 predicate(UseAVX == 0);
17393 match(Set dst (DivD dst src));
17394
17395 format %{ "divsd $dst, $src" %}
17396 ins_cost(150);
17397 ins_encode %{
17398 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17399 %}
17400 ins_pipe(pipe_slow);
17401 %}
17402
17403 instruct divD_mem(regD dst, memory src) %{
17404 predicate(UseAVX == 0);
17405 match(Set dst (DivD dst (LoadD src)));
17406
17407 format %{ "divsd $dst, $src" %}
17408 ins_cost(150);
17409 ins_encode %{
17410 __ divsd($dst$$XMMRegister, $src$$Address);
17411 %}
17412 ins_pipe(pipe_slow);
17413 %}
17414
17415 instruct divD_imm(regD dst, immD con) %{
17416 predicate(UseAVX == 0);
17417 match(Set dst (DivD dst con));
17418 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17419 ins_cost(150);
17420 ins_encode %{
17421 __ divsd($dst$$XMMRegister, $constantaddress($con));
17422 %}
17423 ins_pipe(pipe_slow);
17424 %}
17425
17426 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17427 predicate(UseAVX > 0);
17428 match(Set dst (DivD src1 src2));
17429
17430 format %{ "vdivsd $dst, $src1, $src2" %}
17431 ins_cost(150);
17432 ins_encode %{
17433 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17434 %}
17435 ins_pipe(pipe_slow);
17436 %}
17437
17438 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17439 predicate(UseAVX > 0);
17440 match(Set dst (DivD src1 (LoadD src2)));
17441
17442 format %{ "vdivsd $dst, $src1, $src2" %}
17443 ins_cost(150);
17444 ins_encode %{
17445 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17446 %}
17447 ins_pipe(pipe_slow);
17448 %}
17449
17450 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17451 predicate(UseAVX > 0);
17452 match(Set dst (DivD src con));
17453
17454 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17455 ins_cost(150);
17456 ins_encode %{
17457 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17458 %}
17459 ins_pipe(pipe_slow);
17460 %}
17461
17462 instruct absF_reg(regF dst) %{
17463 predicate(UseAVX == 0);
17464 match(Set dst (AbsF dst));
17465 ins_cost(150);
17466 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17467 ins_encode %{
17468 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17469 %}
17470 ins_pipe(pipe_slow);
17471 %}
17472
17473 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17474 predicate(UseAVX > 0);
17475 match(Set dst (AbsF src));
17476 ins_cost(150);
17477 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17478 ins_encode %{
17479 int vlen_enc = Assembler::AVX_128bit;
17480 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17481 ExternalAddress(float_signmask()), vlen_enc);
17482 %}
17483 ins_pipe(pipe_slow);
17484 %}
17485
17486 instruct absD_reg(regD dst) %{
17487 predicate(UseAVX == 0);
17488 match(Set dst (AbsD dst));
17489 ins_cost(150);
17490 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17491 "# abs double by sign masking" %}
17492 ins_encode %{
17493 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17494 %}
17495 ins_pipe(pipe_slow);
17496 %}
17497
17498 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17499 predicate(UseAVX > 0);
17500 match(Set dst (AbsD src));
17501 ins_cost(150);
17502 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17503 "# abs double by sign masking" %}
17504 ins_encode %{
17505 int vlen_enc = Assembler::AVX_128bit;
17506 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17507 ExternalAddress(double_signmask()), vlen_enc);
17508 %}
17509 ins_pipe(pipe_slow);
17510 %}
17511
17512 instruct negF_reg(regF dst) %{
17513 predicate(UseAVX == 0);
17514 match(Set dst (NegF dst));
17515 ins_cost(150);
17516 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17517 ins_encode %{
17518 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17519 %}
17520 ins_pipe(pipe_slow);
17521 %}
17522
17523 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17524 predicate(UseAVX > 0);
17525 match(Set dst (NegF src));
17526 ins_cost(150);
17527 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17528 ins_encode %{
17529 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17530 ExternalAddress(float_signflip()));
17531 %}
17532 ins_pipe(pipe_slow);
17533 %}
17534
17535 instruct negD_reg(regD dst) %{
17536 predicate(UseAVX == 0);
17537 match(Set dst (NegD dst));
17538 ins_cost(150);
17539 format %{ "xorpd $dst, [0x8000000000000000]\t"
17540 "# neg double by sign flipping" %}
17541 ins_encode %{
17542 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17543 %}
17544 ins_pipe(pipe_slow);
17545 %}
17546
17547 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17548 predicate(UseAVX > 0);
17549 match(Set dst (NegD src));
17550 ins_cost(150);
17551 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17552 "# neg double by sign flipping" %}
17553 ins_encode %{
17554 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17555 ExternalAddress(double_signflip()));
17556 %}
17557 ins_pipe(pipe_slow);
17558 %}
17559
17560 // sqrtss instruction needs destination register to be pre initialized for best performance
17561 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17562 instruct sqrtF_reg(regF dst) %{
17563 match(Set dst (SqrtF dst));
17564 format %{ "sqrtss $dst, $dst" %}
17565 ins_encode %{
17566 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17567 %}
17568 ins_pipe(pipe_slow);
17569 %}
17570
17571 // sqrtsd instruction needs destination register to be pre initialized for best performance
17572 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17573 instruct sqrtD_reg(regD dst) %{
17574 match(Set dst (SqrtD dst));
17575 format %{ "sqrtsd $dst, $dst" %}
17576 ins_encode %{
17577 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17578 %}
17579 ins_pipe(pipe_slow);
17580 %}
17581
17582 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17583 effect(TEMP tmp);
17584 match(Set dst (ConvF2HF src));
17585 ins_cost(125);
17586 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17587 ins_encode %{
17588 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17589 %}
17590 ins_pipe( pipe_slow );
17591 %}
17592
17593 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17594 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17595 effect(TEMP ktmp, TEMP rtmp);
17596 match(Set mem (StoreC mem (ConvF2HF src)));
17597 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17598 ins_encode %{
17599 __ movl($rtmp$$Register, 0x1);
17600 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17601 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17602 %}
17603 ins_pipe( pipe_slow );
17604 %}
17605
17606 instruct vconvF2HF(vec dst, vec src) %{
17607 match(Set dst (VectorCastF2HF src));
17608 format %{ "vector_conv_F2HF $dst $src" %}
17609 ins_encode %{
17610 int vlen_enc = vector_length_encoding(this, $src);
17611 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17612 %}
17613 ins_pipe( pipe_slow );
17614 %}
17615
17616 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17617 predicate(n->as_StoreVector()->memory_size() >= 16);
17618 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17619 format %{ "vcvtps2ph $mem,$src" %}
17620 ins_encode %{
17621 int vlen_enc = vector_length_encoding(this, $src);
17622 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17623 %}
17624 ins_pipe( pipe_slow );
17625 %}
17626
17627 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17628 match(Set dst (ConvHF2F src));
17629 format %{ "vcvtph2ps $dst,$src" %}
17630 ins_encode %{
17631 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17632 %}
17633 ins_pipe( pipe_slow );
17634 %}
17635
17636 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17637 match(Set dst (VectorCastHF2F (LoadVector mem)));
17638 format %{ "vcvtph2ps $dst,$mem" %}
17639 ins_encode %{
17640 int vlen_enc = vector_length_encoding(this);
17641 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17642 %}
17643 ins_pipe( pipe_slow );
17644 %}
17645
17646 instruct vconvHF2F(vec dst, vec src) %{
17647 match(Set dst (VectorCastHF2F src));
17648 ins_cost(125);
17649 format %{ "vector_conv_HF2F $dst,$src" %}
17650 ins_encode %{
17651 int vlen_enc = vector_length_encoding(this);
17652 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17653 %}
17654 ins_pipe( pipe_slow );
17655 %}
17656
17657 // ---------------------------------------- VectorReinterpret ------------------------------------
17658 instruct reinterpret_mask(kReg dst) %{
17659 predicate(n->bottom_type()->isa_pvectmask() &&
17660 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17661 match(Set dst (VectorReinterpret dst));
17662 ins_cost(125);
17663 format %{ "vector_reinterpret $dst\t!" %}
17664 ins_encode %{
17665 // empty
17666 %}
17667 ins_pipe( pipe_slow );
17668 %}
17669
17670 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17671 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17672 n->bottom_type()->isa_pvectmask() &&
17673 n->in(1)->bottom_type()->isa_pvectmask() &&
17674 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
17675 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17676 match(Set dst (VectorReinterpret src));
17677 effect(TEMP xtmp);
17678 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17679 ins_encode %{
17680 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17681 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17682 assert(src_sz == dst_sz , "src and dst size mismatch");
17683 int vlen_enc = vector_length_encoding(src_sz);
17684 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17685 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17686 %}
17687 ins_pipe( pipe_slow );
17688 %}
17689
17690 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17691 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17692 n->bottom_type()->isa_pvectmask() &&
17693 n->in(1)->bottom_type()->isa_pvectmask() &&
17694 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
17695 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
17696 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17697 match(Set dst (VectorReinterpret src));
17698 effect(TEMP xtmp);
17699 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17700 ins_encode %{
17701 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17702 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17703 assert(src_sz == dst_sz , "src and dst size mismatch");
17704 int vlen_enc = vector_length_encoding(src_sz);
17705 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17706 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17707 %}
17708 ins_pipe( pipe_slow );
17709 %}
17710
17711 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
17712 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17713 n->bottom_type()->isa_pvectmask() &&
17714 n->in(1)->bottom_type()->isa_pvectmask() &&
17715 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
17716 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
17717 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17718 match(Set dst (VectorReinterpret src));
17719 effect(TEMP xtmp);
17720 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
17721 ins_encode %{
17722 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
17723 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17724 assert(src_sz == dst_sz , "src and dst size mismatch");
17725 int vlen_enc = vector_length_encoding(src_sz);
17726 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17727 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17728 %}
17729 ins_pipe( pipe_slow );
17730 %}
17731
17732 instruct reinterpret(vec dst) %{
17733 predicate(!n->bottom_type()->isa_pvectmask() &&
17734 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
17735 match(Set dst (VectorReinterpret dst));
17736 ins_cost(125);
17737 format %{ "vector_reinterpret $dst\t!" %}
17738 ins_encode %{
17739 // empty
17740 %}
17741 ins_pipe( pipe_slow );
17742 %}
17743
17744 instruct reinterpret_expand(vec dst, vec src) %{
17745 predicate(UseAVX == 0 &&
17746 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17747 match(Set dst (VectorReinterpret src));
17748 ins_cost(125);
17749 effect(TEMP dst);
17750 format %{ "vector_reinterpret_expand $dst,$src" %}
17751 ins_encode %{
17752 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
17753 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
17754
17755 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
17756 if (src_vlen_in_bytes == 4) {
17757 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
17758 } else {
17759 assert(src_vlen_in_bytes == 8, "");
17760 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
17761 }
17762 __ pand($dst$$XMMRegister, $src$$XMMRegister);
17763 %}
17764 ins_pipe( pipe_slow );
17765 %}
17766
17767 instruct vreinterpret_expand4(legVec dst, vec src) %{
17768 predicate(UseAVX > 0 &&
17769 !n->bottom_type()->isa_pvectmask() &&
17770 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
17771 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17772 match(Set dst (VectorReinterpret src));
17773 ins_cost(125);
17774 format %{ "vector_reinterpret_expand $dst,$src" %}
17775 ins_encode %{
17776 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
17777 %}
17778 ins_pipe( pipe_slow );
17779 %}
17780
17781
17782 instruct vreinterpret_expand(legVec dst, vec src) %{
17783 predicate(UseAVX > 0 &&
17784 !n->bottom_type()->isa_pvectmask() &&
17785 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
17786 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17787 match(Set dst (VectorReinterpret src));
17788 ins_cost(125);
17789 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
17790 ins_encode %{
17791 switch (Matcher::vector_length_in_bytes(this, $src)) {
17792 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
17793 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17794 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17795 default: ShouldNotReachHere();
17796 }
17797 %}
17798 ins_pipe( pipe_slow );
17799 %}
17800
17801 instruct reinterpret_shrink(vec dst, legVec src) %{
17802 predicate(!n->bottom_type()->isa_pvectmask() &&
17803 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
17804 match(Set dst (VectorReinterpret src));
17805 ins_cost(125);
17806 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
17807 ins_encode %{
17808 switch (Matcher::vector_length_in_bytes(this)) {
17809 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
17810 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
17811 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17812 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17813 default: ShouldNotReachHere();
17814 }
17815 %}
17816 ins_pipe( pipe_slow );
17817 %}
17818
17819 // ----------------------------------------------------------------------------------------------------
17820
17821 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
17822 match(Set dst (RoundDoubleMode src rmode));
17823 format %{ "roundsd $dst,$src" %}
17824 ins_cost(150);
17825 ins_encode %{
17826 assert(UseSSE >= 4, "required");
17827 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
17828 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
17829 }
17830 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
17831 %}
17832 ins_pipe(pipe_slow);
17833 %}
17834
17835 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
17836 match(Set dst (RoundDoubleMode con rmode));
17837 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
17838 ins_cost(150);
17839 ins_encode %{
17840 assert(UseSSE >= 4, "required");
17841 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
17842 %}
17843 ins_pipe(pipe_slow);
17844 %}
17845
17846 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
17847 predicate(Matcher::vector_length(n) < 8);
17848 match(Set dst (RoundDoubleModeV src rmode));
17849 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
17850 ins_encode %{
17851 assert(UseAVX > 0, "required");
17852 int vlen_enc = vector_length_encoding(this);
17853 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
17854 %}
17855 ins_pipe( pipe_slow );
17856 %}
17857
17858 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
17859 predicate(Matcher::vector_length(n) == 8);
17860 match(Set dst (RoundDoubleModeV src rmode));
17861 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
17862 ins_encode %{
17863 assert(UseAVX > 2, "required");
17864 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
17865 %}
17866 ins_pipe( pipe_slow );
17867 %}
17868
17869 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
17870 predicate(Matcher::vector_length(n) < 8);
17871 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17872 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
17873 ins_encode %{
17874 assert(UseAVX > 0, "required");
17875 int vlen_enc = vector_length_encoding(this);
17876 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
17877 %}
17878 ins_pipe( pipe_slow );
17879 %}
17880
17881 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
17882 predicate(Matcher::vector_length(n) == 8);
17883 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17884 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
17885 ins_encode %{
17886 assert(UseAVX > 2, "required");
17887 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
17888 %}
17889 ins_pipe( pipe_slow );
17890 %}
17891
17892 instruct onspinwait() %{
17893 match(OnSpinWait);
17894 ins_cost(200);
17895
17896 format %{
17897 $$template
17898 $$emit$$"pause\t! membar_onspinwait"
17899 %}
17900 ins_encode %{
17901 __ pause();
17902 %}
17903 ins_pipe(pipe_slow);
17904 %}
17905
17906 // a * b + c
17907 instruct fmaD_reg(regD a, regD b, regD c) %{
17908 match(Set c (FmaD c (Binary a b)));
17909 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
17910 ins_cost(150);
17911 ins_encode %{
17912 assert(UseFMA, "Needs FMA instructions support.");
17913 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17914 %}
17915 ins_pipe( pipe_slow );
17916 %}
17917
17918 // a * b + c
17919 instruct fmaF_reg(regF a, regF b, regF c) %{
17920 match(Set c (FmaF c (Binary a b)));
17921 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
17922 ins_cost(150);
17923 ins_encode %{
17924 assert(UseFMA, "Needs FMA instructions support.");
17925 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17926 %}
17927 ins_pipe( pipe_slow );
17928 %}
17929
17930 // ====================VECTOR INSTRUCTIONS=====================================
17931
17932 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
17933 instruct MoveVec2Leg(legVec dst, vec src) %{
17934 match(Set dst src);
17935 format %{ "" %}
17936 ins_encode %{
17937 ShouldNotReachHere();
17938 %}
17939 ins_pipe( fpu_reg_reg );
17940 %}
17941
17942 instruct MoveLeg2Vec(vec dst, legVec src) %{
17943 match(Set dst src);
17944 format %{ "" %}
17945 ins_encode %{
17946 ShouldNotReachHere();
17947 %}
17948 ins_pipe( fpu_reg_reg );
17949 %}
17950
17951 // ============================================================================
17952
17953 // Load vectors generic operand pattern
17954 instruct loadV(vec dst, memory mem) %{
17955 match(Set dst (LoadVector mem));
17956 ins_cost(125);
17957 format %{ "load_vector $dst,$mem" %}
17958 ins_encode %{
17959 BasicType bt = Matcher::vector_element_basic_type(this);
17960 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
17961 %}
17962 ins_pipe( pipe_slow );
17963 %}
17964
17965 // Store vectors generic operand pattern.
17966 instruct storeV(memory mem, vec src) %{
17967 match(Set mem (StoreVector mem src));
17968 ins_cost(145);
17969 format %{ "store_vector $mem,$src\n\t" %}
17970 ins_encode %{
17971 switch (Matcher::vector_length_in_bytes(this, $src)) {
17972 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
17973 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
17974 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
17975 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
17976 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
17977 default: ShouldNotReachHere();
17978 }
17979 %}
17980 ins_pipe( pipe_slow );
17981 %}
17982
17983 // ---------------------------------------- Gather ------------------------------------
17984
17985 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
17986
17987 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
17988 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
17989 Matcher::vector_length_in_bytes(n) <= 32);
17990 match(Set dst (LoadVectorGather mem idx));
17991 effect(TEMP dst, TEMP tmp, TEMP mask);
17992 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
17993 ins_encode %{
17994 int vlen_enc = vector_length_encoding(this);
17995 BasicType elem_bt = Matcher::vector_element_basic_type(this);
17996 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
17997 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
17998 __ lea($tmp$$Register, $mem$$Address);
17999 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18000 %}
18001 ins_pipe( pipe_slow );
18002 %}
18003
18004
18005 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18006 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18007 !is_subword_type(Matcher::vector_element_basic_type(n)));
18008 match(Set dst (LoadVectorGather mem idx));
18009 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18010 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18011 ins_encode %{
18012 int vlen_enc = vector_length_encoding(this);
18013 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18014 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18015 __ lea($tmp$$Register, $mem$$Address);
18016 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18017 %}
18018 ins_pipe( pipe_slow );
18019 %}
18020
18021 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18022 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18023 !is_subword_type(Matcher::vector_element_basic_type(n)));
18024 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18025 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18026 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18027 ins_encode %{
18028 assert(UseAVX > 2, "sanity");
18029 int vlen_enc = vector_length_encoding(this);
18030 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18031 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18032 // Note: Since gather instruction partially updates the opmask register used
18033 // for predication hense moving mask operand to a temporary.
18034 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18035 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18036 __ lea($tmp$$Register, $mem$$Address);
18037 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18038 %}
18039 ins_pipe( pipe_slow );
18040 %}
18041
18042 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18043 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18044 match(Set dst (LoadVectorGather mem idx_base));
18045 effect(TEMP tmp, TEMP rtmp);
18046 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18047 ins_encode %{
18048 int vlen_enc = vector_length_encoding(this);
18049 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18050 __ lea($tmp$$Register, $mem$$Address);
18051 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18052 %}
18053 ins_pipe( pipe_slow );
18054 %}
18055
18056 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18057 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18058 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18059 match(Set dst (LoadVectorGather mem idx_base));
18060 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18061 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18062 ins_encode %{
18063 int vlen_enc = vector_length_encoding(this);
18064 int vector_len = Matcher::vector_length(this);
18065 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18066 __ lea($tmp$$Register, $mem$$Address);
18067 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18068 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18069 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18070 %}
18071 ins_pipe( pipe_slow );
18072 %}
18073
18074 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18075 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18076 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18077 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18078 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18079 ins_encode %{
18080 int vlen_enc = vector_length_encoding(this);
18081 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18082 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18083 __ lea($tmp$$Register, $mem$$Address);
18084 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18085 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18086 %}
18087 ins_pipe( pipe_slow );
18088 %}
18089
18090 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18091 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18092 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18093 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18094 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18095 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18096 ins_encode %{
18097 int vlen_enc = vector_length_encoding(this);
18098 int vector_len = Matcher::vector_length(this);
18099 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18100 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18101 __ lea($tmp$$Register, $mem$$Address);
18102 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18103 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18104 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18105 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18106 %}
18107 ins_pipe( pipe_slow );
18108 %}
18109
18110 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18111 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18112 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18113 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18114 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18115 ins_encode %{
18116 int vlen_enc = vector_length_encoding(this);
18117 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18118 __ lea($tmp$$Register, $mem$$Address);
18119 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18120 if (elem_bt == T_SHORT) {
18121 __ movl($mask_idx$$Register, 0x55555555);
18122 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18123 }
18124 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18125 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18126 %}
18127 ins_pipe( pipe_slow );
18128 %}
18129
18130 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18131 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18132 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18133 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18134 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18135 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18136 ins_encode %{
18137 int vlen_enc = vector_length_encoding(this);
18138 int vector_len = Matcher::vector_length(this);
18139 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18140 __ lea($tmp$$Register, $mem$$Address);
18141 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18142 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18143 if (elem_bt == T_SHORT) {
18144 __ movl($mask_idx$$Register, 0x55555555);
18145 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18146 }
18147 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18148 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18149 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18150 %}
18151 ins_pipe( pipe_slow );
18152 %}
18153
18154 // ====================Scatter=======================================
18155
18156 // Scatter INT, LONG, FLOAT, DOUBLE
18157
18158 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18159 predicate(UseAVX > 2);
18160 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18161 effect(TEMP tmp, TEMP ktmp);
18162 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18163 ins_encode %{
18164 int vlen_enc = vector_length_encoding(this, $src);
18165 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18166
18167 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18168 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18169
18170 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18171 __ lea($tmp$$Register, $mem$$Address);
18172 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18173 %}
18174 ins_pipe( pipe_slow );
18175 %}
18176
18177 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18178 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18179 effect(TEMP tmp, TEMP ktmp);
18180 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18181 ins_encode %{
18182 int vlen_enc = vector_length_encoding(this, $src);
18183 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18184 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18185 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18186 // Note: Since scatter instruction partially updates the opmask register used
18187 // for predication hense moving mask operand to a temporary.
18188 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18189 __ lea($tmp$$Register, $mem$$Address);
18190 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18191 %}
18192 ins_pipe( pipe_slow );
18193 %}
18194
18195 // ====================REPLICATE=======================================
18196
18197 // Replicate byte scalar to be vector
18198 instruct vReplB_reg(vec dst, rRegI src) %{
18199 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18200 match(Set dst (Replicate src));
18201 format %{ "replicateB $dst,$src" %}
18202 ins_encode %{
18203 uint vlen = Matcher::vector_length(this);
18204 if (UseAVX >= 2) {
18205 int vlen_enc = vector_length_encoding(this);
18206 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18207 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18208 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18209 } else {
18210 __ movdl($dst$$XMMRegister, $src$$Register);
18211 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18212 }
18213 } else {
18214 assert(UseAVX < 2, "");
18215 __ movdl($dst$$XMMRegister, $src$$Register);
18216 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18217 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18218 if (vlen >= 16) {
18219 assert(vlen == 16, "");
18220 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18221 }
18222 }
18223 %}
18224 ins_pipe( pipe_slow );
18225 %}
18226
18227 instruct ReplB_mem(vec dst, memory mem) %{
18228 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18229 match(Set dst (Replicate (LoadB mem)));
18230 format %{ "replicateB $dst,$mem" %}
18231 ins_encode %{
18232 int vlen_enc = vector_length_encoding(this);
18233 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18234 %}
18235 ins_pipe( pipe_slow );
18236 %}
18237
18238 // ====================ReplicateS=======================================
18239
18240 instruct vReplS_reg(vec dst, rRegI src) %{
18241 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18242 match(Set dst (Replicate src));
18243 format %{ "replicateS $dst,$src" %}
18244 ins_encode %{
18245 uint vlen = Matcher::vector_length(this);
18246 int vlen_enc = vector_length_encoding(this);
18247 if (UseAVX >= 2) {
18248 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18249 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18250 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18251 } else {
18252 __ movdl($dst$$XMMRegister, $src$$Register);
18253 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18254 }
18255 } else {
18256 assert(UseAVX < 2, "");
18257 __ movdl($dst$$XMMRegister, $src$$Register);
18258 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18259 if (vlen >= 8) {
18260 assert(vlen == 8, "");
18261 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18262 }
18263 }
18264 %}
18265 ins_pipe( pipe_slow );
18266 %}
18267
18268 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18269 match(Set dst (Replicate con));
18270 effect(TEMP rtmp);
18271 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18272 ins_encode %{
18273 int vlen_enc = vector_length_encoding(this);
18274 BasicType bt = Matcher::vector_element_basic_type(this);
18275 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18276 __ movl($rtmp$$Register, $con$$constant);
18277 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18278 %}
18279 ins_pipe( pipe_slow );
18280 %}
18281
18282 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18283 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18284 match(Set dst (Replicate src));
18285 effect(TEMP rtmp);
18286 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18287 ins_encode %{
18288 int vlen_enc = vector_length_encoding(this);
18289 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18290 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18291 %}
18292 ins_pipe( pipe_slow );
18293 %}
18294
18295 instruct ReplS_mem(vec dst, memory mem) %{
18296 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18297 match(Set dst (Replicate (LoadS mem)));
18298 format %{ "replicateS $dst,$mem" %}
18299 ins_encode %{
18300 int vlen_enc = vector_length_encoding(this);
18301 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18302 %}
18303 ins_pipe( pipe_slow );
18304 %}
18305
18306 // ====================ReplicateI=======================================
18307
18308 instruct ReplI_reg(vec dst, rRegI src) %{
18309 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18310 match(Set dst (Replicate src));
18311 format %{ "replicateI $dst,$src" %}
18312 ins_encode %{
18313 uint vlen = Matcher::vector_length(this);
18314 int vlen_enc = vector_length_encoding(this);
18315 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18316 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18317 } else if (VM_Version::supports_avx2()) {
18318 __ movdl($dst$$XMMRegister, $src$$Register);
18319 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18320 } else {
18321 __ movdl($dst$$XMMRegister, $src$$Register);
18322 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18323 }
18324 %}
18325 ins_pipe( pipe_slow );
18326 %}
18327
18328 instruct ReplI_mem(vec dst, memory mem) %{
18329 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18330 match(Set dst (Replicate (LoadI mem)));
18331 format %{ "replicateI $dst,$mem" %}
18332 ins_encode %{
18333 int vlen_enc = vector_length_encoding(this);
18334 if (VM_Version::supports_avx2()) {
18335 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18336 } else if (VM_Version::supports_avx()) {
18337 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18338 } else {
18339 __ movdl($dst$$XMMRegister, $mem$$Address);
18340 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18341 }
18342 %}
18343 ins_pipe( pipe_slow );
18344 %}
18345
18346 instruct ReplI_imm(vec dst, immI con) %{
18347 predicate(Matcher::is_non_long_integral_vector(n));
18348 match(Set dst (Replicate con));
18349 format %{ "replicateI $dst,$con" %}
18350 ins_encode %{
18351 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18352 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18353 type2aelembytes(Matcher::vector_element_basic_type(this))));
18354 BasicType bt = Matcher::vector_element_basic_type(this);
18355 int vlen = Matcher::vector_length_in_bytes(this);
18356 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18357 %}
18358 ins_pipe( pipe_slow );
18359 %}
18360
18361 // Replicate scalar zero to be vector
18362 instruct ReplI_zero(vec dst, immI_0 zero) %{
18363 predicate(Matcher::is_non_long_integral_vector(n));
18364 match(Set dst (Replicate zero));
18365 format %{ "replicateI $dst,$zero" %}
18366 ins_encode %{
18367 int vlen_enc = vector_length_encoding(this);
18368 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18369 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18370 } else {
18371 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18372 }
18373 %}
18374 ins_pipe( fpu_reg_reg );
18375 %}
18376
18377 instruct ReplI_M1(vec dst, immI_M1 con) %{
18378 predicate(Matcher::is_non_long_integral_vector(n));
18379 match(Set dst (Replicate con));
18380 format %{ "vallones $dst" %}
18381 ins_encode %{
18382 int vector_len = vector_length_encoding(this);
18383 __ vallones($dst$$XMMRegister, vector_len);
18384 %}
18385 ins_pipe( pipe_slow );
18386 %}
18387
18388 // ====================ReplicateL=======================================
18389
18390 // Replicate long (8 byte) scalar to be vector
18391 instruct ReplL_reg(vec dst, rRegL src) %{
18392 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18393 match(Set dst (Replicate src));
18394 format %{ "replicateL $dst,$src" %}
18395 ins_encode %{
18396 int vlen = Matcher::vector_length(this);
18397 int vlen_enc = vector_length_encoding(this);
18398 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18399 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18400 } else if (VM_Version::supports_avx2()) {
18401 __ movdq($dst$$XMMRegister, $src$$Register);
18402 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18403 } else {
18404 __ movdq($dst$$XMMRegister, $src$$Register);
18405 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18406 }
18407 %}
18408 ins_pipe( pipe_slow );
18409 %}
18410
18411 instruct ReplL_mem(vec dst, memory mem) %{
18412 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18413 match(Set dst (Replicate (LoadL mem)));
18414 format %{ "replicateL $dst,$mem" %}
18415 ins_encode %{
18416 int vlen_enc = vector_length_encoding(this);
18417 if (VM_Version::supports_avx2()) {
18418 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18419 } else if (VM_Version::supports_sse3()) {
18420 __ movddup($dst$$XMMRegister, $mem$$Address);
18421 } else {
18422 __ movq($dst$$XMMRegister, $mem$$Address);
18423 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18424 }
18425 %}
18426 ins_pipe( pipe_slow );
18427 %}
18428
18429 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18430 instruct ReplL_imm(vec dst, immL con) %{
18431 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18432 match(Set dst (Replicate con));
18433 format %{ "replicateL $dst,$con" %}
18434 ins_encode %{
18435 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18436 int vlen = Matcher::vector_length_in_bytes(this);
18437 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18438 %}
18439 ins_pipe( pipe_slow );
18440 %}
18441
18442 instruct ReplL_zero(vec dst, immL0 zero) %{
18443 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18444 match(Set dst (Replicate zero));
18445 format %{ "replicateL $dst,$zero" %}
18446 ins_encode %{
18447 int vlen_enc = vector_length_encoding(this);
18448 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18449 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18450 } else {
18451 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18452 }
18453 %}
18454 ins_pipe( fpu_reg_reg );
18455 %}
18456
18457 instruct ReplL_M1(vec dst, immL_M1 con) %{
18458 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18459 match(Set dst (Replicate con));
18460 format %{ "vallones $dst" %}
18461 ins_encode %{
18462 int vector_len = vector_length_encoding(this);
18463 __ vallones($dst$$XMMRegister, vector_len);
18464 %}
18465 ins_pipe( pipe_slow );
18466 %}
18467
18468 // ====================ReplicateF=======================================
18469
18470 instruct vReplF_reg(vec dst, vlRegF src) %{
18471 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18472 match(Set dst (Replicate src));
18473 format %{ "replicateF $dst,$src" %}
18474 ins_encode %{
18475 uint vlen = Matcher::vector_length(this);
18476 int vlen_enc = vector_length_encoding(this);
18477 if (vlen <= 4) {
18478 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18479 } else if (VM_Version::supports_avx2()) {
18480 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18481 } else {
18482 assert(vlen == 8, "sanity");
18483 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18484 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18485 }
18486 %}
18487 ins_pipe( pipe_slow );
18488 %}
18489
18490 instruct ReplF_reg(vec dst, vlRegF src) %{
18491 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18492 match(Set dst (Replicate src));
18493 format %{ "replicateF $dst,$src" %}
18494 ins_encode %{
18495 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18496 %}
18497 ins_pipe( pipe_slow );
18498 %}
18499
18500 instruct ReplF_mem(vec dst, memory mem) %{
18501 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18502 match(Set dst (Replicate (LoadF mem)));
18503 format %{ "replicateF $dst,$mem" %}
18504 ins_encode %{
18505 int vlen_enc = vector_length_encoding(this);
18506 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18507 %}
18508 ins_pipe( pipe_slow );
18509 %}
18510
18511 // Replicate float scalar immediate to be vector by loading from const table.
18512 instruct ReplF_imm(vec dst, immF con) %{
18513 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18514 match(Set dst (Replicate con));
18515 format %{ "replicateF $dst,$con" %}
18516 ins_encode %{
18517 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18518 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18519 int vlen = Matcher::vector_length_in_bytes(this);
18520 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18521 %}
18522 ins_pipe( pipe_slow );
18523 %}
18524
18525 instruct ReplF_zero(vec dst, immF0 zero) %{
18526 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18527 match(Set dst (Replicate zero));
18528 format %{ "replicateF $dst,$zero" %}
18529 ins_encode %{
18530 int vlen_enc = vector_length_encoding(this);
18531 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18532 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18533 } else {
18534 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18535 }
18536 %}
18537 ins_pipe( fpu_reg_reg );
18538 %}
18539
18540 // ====================ReplicateD=======================================
18541
18542 // Replicate double (8 bytes) scalar to be vector
18543 instruct vReplD_reg(vec dst, vlRegD src) %{
18544 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18545 match(Set dst (Replicate src));
18546 format %{ "replicateD $dst,$src" %}
18547 ins_encode %{
18548 uint vlen = Matcher::vector_length(this);
18549 int vlen_enc = vector_length_encoding(this);
18550 if (vlen <= 2) {
18551 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18552 } else if (VM_Version::supports_avx2()) {
18553 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18554 } else {
18555 assert(vlen == 4, "sanity");
18556 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18557 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18558 }
18559 %}
18560 ins_pipe( pipe_slow );
18561 %}
18562
18563 instruct ReplD_reg(vec dst, vlRegD src) %{
18564 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18565 match(Set dst (Replicate src));
18566 format %{ "replicateD $dst,$src" %}
18567 ins_encode %{
18568 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18569 %}
18570 ins_pipe( pipe_slow );
18571 %}
18572
18573 instruct ReplD_mem(vec dst, memory mem) %{
18574 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18575 match(Set dst (Replicate (LoadD mem)));
18576 format %{ "replicateD $dst,$mem" %}
18577 ins_encode %{
18578 if (Matcher::vector_length(this) >= 4) {
18579 int vlen_enc = vector_length_encoding(this);
18580 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18581 } else {
18582 __ movddup($dst$$XMMRegister, $mem$$Address);
18583 }
18584 %}
18585 ins_pipe( pipe_slow );
18586 %}
18587
18588 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18589 instruct ReplD_imm(vec dst, immD con) %{
18590 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18591 match(Set dst (Replicate con));
18592 format %{ "replicateD $dst,$con" %}
18593 ins_encode %{
18594 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18595 int vlen = Matcher::vector_length_in_bytes(this);
18596 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18597 %}
18598 ins_pipe( pipe_slow );
18599 %}
18600
18601 instruct ReplD_zero(vec dst, immD0 zero) %{
18602 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18603 match(Set dst (Replicate zero));
18604 format %{ "replicateD $dst,$zero" %}
18605 ins_encode %{
18606 int vlen_enc = vector_length_encoding(this);
18607 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18608 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18609 } else {
18610 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18611 }
18612 %}
18613 ins_pipe( fpu_reg_reg );
18614 %}
18615
18616 // ====================VECTOR INSERT=======================================
18617
18618 instruct insert(vec dst, rRegI val, immU8 idx) %{
18619 predicate(Matcher::vector_length_in_bytes(n) < 32);
18620 match(Set dst (VectorInsert (Binary dst val) idx));
18621 format %{ "vector_insert $dst,$val,$idx" %}
18622 ins_encode %{
18623 assert(UseSSE >= 4, "required");
18624 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18625
18626 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18627
18628 assert(is_integral_type(elem_bt), "");
18629 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18630
18631 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18632 %}
18633 ins_pipe( pipe_slow );
18634 %}
18635
18636 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18637 predicate(Matcher::vector_length_in_bytes(n) == 32);
18638 match(Set dst (VectorInsert (Binary src val) idx));
18639 effect(TEMP vtmp);
18640 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18641 ins_encode %{
18642 int vlen_enc = Assembler::AVX_256bit;
18643 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18644 int elem_per_lane = 16/type2aelembytes(elem_bt);
18645 int log2epr = log2(elem_per_lane);
18646
18647 assert(is_integral_type(elem_bt), "sanity");
18648 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18649
18650 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18651 uint y_idx = ($idx$$constant >> log2epr) & 1;
18652 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18653 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18654 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18655 %}
18656 ins_pipe( pipe_slow );
18657 %}
18658
18659 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18660 predicate(Matcher::vector_length_in_bytes(n) == 64);
18661 match(Set dst (VectorInsert (Binary src val) idx));
18662 effect(TEMP vtmp);
18663 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18664 ins_encode %{
18665 assert(UseAVX > 2, "sanity");
18666
18667 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18668 int elem_per_lane = 16/type2aelembytes(elem_bt);
18669 int log2epr = log2(elem_per_lane);
18670
18671 assert(is_integral_type(elem_bt), "");
18672 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18673
18674 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18675 uint y_idx = ($idx$$constant >> log2epr) & 3;
18676 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18677 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18678 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18679 %}
18680 ins_pipe( pipe_slow );
18681 %}
18682
18683 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18684 predicate(Matcher::vector_length(n) == 2);
18685 match(Set dst (VectorInsert (Binary dst val) idx));
18686 format %{ "vector_insert $dst,$val,$idx" %}
18687 ins_encode %{
18688 assert(UseSSE >= 4, "required");
18689 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18690 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18691
18692 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18693 %}
18694 ins_pipe( pipe_slow );
18695 %}
18696
18697 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18698 predicate(Matcher::vector_length(n) == 4);
18699 match(Set dst (VectorInsert (Binary src val) idx));
18700 effect(TEMP vtmp);
18701 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18702 ins_encode %{
18703 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18704 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18705
18706 uint x_idx = $idx$$constant & right_n_bits(1);
18707 uint y_idx = ($idx$$constant >> 1) & 1;
18708 int vlen_enc = Assembler::AVX_256bit;
18709 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18710 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18711 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18712 %}
18713 ins_pipe( pipe_slow );
18714 %}
18715
18716 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
18717 predicate(Matcher::vector_length(n) == 8);
18718 match(Set dst (VectorInsert (Binary src val) idx));
18719 effect(TEMP vtmp);
18720 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18721 ins_encode %{
18722 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
18723 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18724
18725 uint x_idx = $idx$$constant & right_n_bits(1);
18726 uint y_idx = ($idx$$constant >> 1) & 3;
18727 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18728 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18729 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18730 %}
18731 ins_pipe( pipe_slow );
18732 %}
18733
18734 instruct insertF(vec dst, regF val, immU8 idx) %{
18735 predicate(Matcher::vector_length(n) < 8);
18736 match(Set dst (VectorInsert (Binary dst val) idx));
18737 format %{ "vector_insert $dst,$val,$idx" %}
18738 ins_encode %{
18739 assert(UseSSE >= 4, "sanity");
18740
18741 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18742 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18743
18744 uint x_idx = $idx$$constant & right_n_bits(2);
18745 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18746 %}
18747 ins_pipe( pipe_slow );
18748 %}
18749
18750 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
18751 predicate(Matcher::vector_length(n) >= 8);
18752 match(Set dst (VectorInsert (Binary src val) idx));
18753 effect(TEMP vtmp);
18754 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18755 ins_encode %{
18756 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18757 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18758
18759 int vlen = Matcher::vector_length(this);
18760 uint x_idx = $idx$$constant & right_n_bits(2);
18761 if (vlen == 8) {
18762 uint y_idx = ($idx$$constant >> 2) & 1;
18763 int vlen_enc = Assembler::AVX_256bit;
18764 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18765 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18766 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18767 } else {
18768 assert(vlen == 16, "sanity");
18769 uint y_idx = ($idx$$constant >> 2) & 3;
18770 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18771 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18772 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18773 }
18774 %}
18775 ins_pipe( pipe_slow );
18776 %}
18777
18778 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
18779 predicate(Matcher::vector_length(n) == 2);
18780 match(Set dst (VectorInsert (Binary dst val) idx));
18781 effect(TEMP tmp);
18782 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
18783 ins_encode %{
18784 assert(UseSSE >= 4, "sanity");
18785 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18786 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18787
18788 __ movq($tmp$$Register, $val$$XMMRegister);
18789 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
18790 %}
18791 ins_pipe( pipe_slow );
18792 %}
18793
18794 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
18795 predicate(Matcher::vector_length(n) == 4);
18796 match(Set dst (VectorInsert (Binary src val) idx));
18797 effect(TEMP vtmp, TEMP tmp);
18798 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
18799 ins_encode %{
18800 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18801 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18802
18803 uint x_idx = $idx$$constant & right_n_bits(1);
18804 uint y_idx = ($idx$$constant >> 1) & 1;
18805 int vlen_enc = Assembler::AVX_256bit;
18806 __ movq($tmp$$Register, $val$$XMMRegister);
18807 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18808 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18809 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18810 %}
18811 ins_pipe( pipe_slow );
18812 %}
18813
18814 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
18815 predicate(Matcher::vector_length(n) == 8);
18816 match(Set dst (VectorInsert (Binary src val) idx));
18817 effect(TEMP tmp, TEMP vtmp);
18818 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18819 ins_encode %{
18820 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18821 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18822
18823 uint x_idx = $idx$$constant & right_n_bits(1);
18824 uint y_idx = ($idx$$constant >> 1) & 3;
18825 __ movq($tmp$$Register, $val$$XMMRegister);
18826 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18827 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18828 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18829 %}
18830 ins_pipe( pipe_slow );
18831 %}
18832
18833 // ====================REDUCTION ARITHMETIC=======================================
18834
18835 // =======================Int Reduction==========================================
18836
18837 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18838 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
18839 match(Set dst (AddReductionVI src1 src2));
18840 match(Set dst (MulReductionVI src1 src2));
18841 match(Set dst (AndReductionV src1 src2));
18842 match(Set dst ( OrReductionV src1 src2));
18843 match(Set dst (XorReductionV src1 src2));
18844 match(Set dst (MinReductionV src1 src2));
18845 match(Set dst (MaxReductionV src1 src2));
18846 match(Set dst (UMinReductionV src1 src2));
18847 match(Set dst (UMaxReductionV src1 src2));
18848 effect(TEMP vtmp1, TEMP vtmp2);
18849 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18850 ins_encode %{
18851 int opcode = this->ideal_Opcode();
18852 int vlen = Matcher::vector_length(this, $src2);
18853 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18854 %}
18855 ins_pipe( pipe_slow );
18856 %}
18857
18858 // =======================Long Reduction==========================================
18859
18860 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18861 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
18862 match(Set dst (AddReductionVL src1 src2));
18863 match(Set dst (MulReductionVL src1 src2));
18864 match(Set dst (AndReductionV src1 src2));
18865 match(Set dst ( OrReductionV src1 src2));
18866 match(Set dst (XorReductionV src1 src2));
18867 match(Set dst (MinReductionV src1 src2));
18868 match(Set dst (MaxReductionV src1 src2));
18869 match(Set dst (UMinReductionV src1 src2));
18870 match(Set dst (UMaxReductionV src1 src2));
18871 effect(TEMP vtmp1, TEMP vtmp2);
18872 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18873 ins_encode %{
18874 int opcode = this->ideal_Opcode();
18875 int vlen = Matcher::vector_length(this, $src2);
18876 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18877 %}
18878 ins_pipe( pipe_slow );
18879 %}
18880
18881 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
18882 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
18883 match(Set dst (AddReductionVL src1 src2));
18884 match(Set dst (MulReductionVL src1 src2));
18885 match(Set dst (AndReductionV src1 src2));
18886 match(Set dst ( OrReductionV src1 src2));
18887 match(Set dst (XorReductionV src1 src2));
18888 match(Set dst (MinReductionV src1 src2));
18889 match(Set dst (MaxReductionV src1 src2));
18890 match(Set dst (UMinReductionV src1 src2));
18891 match(Set dst (UMaxReductionV src1 src2));
18892 effect(TEMP vtmp1, TEMP vtmp2);
18893 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18894 ins_encode %{
18895 int opcode = this->ideal_Opcode();
18896 int vlen = Matcher::vector_length(this, $src2);
18897 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18898 %}
18899 ins_pipe( pipe_slow );
18900 %}
18901
18902 // =======================Float Reduction==========================================
18903
18904 instruct reductionF128(regF dst, vec src, vec vtmp) %{
18905 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
18906 match(Set dst (AddReductionVF dst src));
18907 match(Set dst (MulReductionVF dst src));
18908 effect(TEMP dst, TEMP vtmp);
18909 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
18910 ins_encode %{
18911 int opcode = this->ideal_Opcode();
18912 int vlen = Matcher::vector_length(this, $src);
18913 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
18914 %}
18915 ins_pipe( pipe_slow );
18916 %}
18917
18918 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
18919 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
18920 match(Set dst (AddReductionVF dst src));
18921 match(Set dst (MulReductionVF dst src));
18922 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18923 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18924 ins_encode %{
18925 int opcode = this->ideal_Opcode();
18926 int vlen = Matcher::vector_length(this, $src);
18927 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18928 %}
18929 ins_pipe( pipe_slow );
18930 %}
18931
18932 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
18933 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
18934 match(Set dst (AddReductionVF dst src));
18935 match(Set dst (MulReductionVF dst src));
18936 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18937 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18938 ins_encode %{
18939 int opcode = this->ideal_Opcode();
18940 int vlen = Matcher::vector_length(this, $src);
18941 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18942 %}
18943 ins_pipe( pipe_slow );
18944 %}
18945
18946
18947 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
18948 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18949 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18950 // src1 contains reduction identity
18951 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
18952 match(Set dst (AddReductionVF src1 src2));
18953 match(Set dst (MulReductionVF src1 src2));
18954 effect(TEMP dst);
18955 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
18956 ins_encode %{
18957 int opcode = this->ideal_Opcode();
18958 int vlen = Matcher::vector_length(this, $src2);
18959 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
18960 %}
18961 ins_pipe( pipe_slow );
18962 %}
18963
18964 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
18965 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18966 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18967 // src1 contains reduction identity
18968 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
18969 match(Set dst (AddReductionVF src1 src2));
18970 match(Set dst (MulReductionVF src1 src2));
18971 effect(TEMP dst, TEMP vtmp);
18972 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
18973 ins_encode %{
18974 int opcode = this->ideal_Opcode();
18975 int vlen = Matcher::vector_length(this, $src2);
18976 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
18977 %}
18978 ins_pipe( pipe_slow );
18979 %}
18980
18981 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
18982 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18983 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18984 // src1 contains reduction identity
18985 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
18986 match(Set dst (AddReductionVF src1 src2));
18987 match(Set dst (MulReductionVF src1 src2));
18988 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18989 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18990 ins_encode %{
18991 int opcode = this->ideal_Opcode();
18992 int vlen = Matcher::vector_length(this, $src2);
18993 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18994 %}
18995 ins_pipe( pipe_slow );
18996 %}
18997
18998 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18999 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19000 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19001 // src1 contains reduction identity
19002 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19003 match(Set dst (AddReductionVF src1 src2));
19004 match(Set dst (MulReductionVF src1 src2));
19005 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19006 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19007 ins_encode %{
19008 int opcode = this->ideal_Opcode();
19009 int vlen = Matcher::vector_length(this, $src2);
19010 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19011 %}
19012 ins_pipe( pipe_slow );
19013 %}
19014
19015 // =======================Double Reduction==========================================
19016
19017 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19018 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19019 match(Set dst (AddReductionVD dst src));
19020 match(Set dst (MulReductionVD dst src));
19021 effect(TEMP dst, TEMP vtmp);
19022 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19023 ins_encode %{
19024 int opcode = this->ideal_Opcode();
19025 int vlen = Matcher::vector_length(this, $src);
19026 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19027 %}
19028 ins_pipe( pipe_slow );
19029 %}
19030
19031 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19032 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19033 match(Set dst (AddReductionVD dst src));
19034 match(Set dst (MulReductionVD dst src));
19035 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19036 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19037 ins_encode %{
19038 int opcode = this->ideal_Opcode();
19039 int vlen = Matcher::vector_length(this, $src);
19040 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19041 %}
19042 ins_pipe( pipe_slow );
19043 %}
19044
19045 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19046 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19047 match(Set dst (AddReductionVD dst src));
19048 match(Set dst (MulReductionVD dst src));
19049 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19050 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19051 ins_encode %{
19052 int opcode = this->ideal_Opcode();
19053 int vlen = Matcher::vector_length(this, $src);
19054 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19055 %}
19056 ins_pipe( pipe_slow );
19057 %}
19058
19059 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19060 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19061 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19062 // src1 contains reduction identity
19063 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19064 match(Set dst (AddReductionVD src1 src2));
19065 match(Set dst (MulReductionVD src1 src2));
19066 effect(TEMP dst);
19067 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19068 ins_encode %{
19069 int opcode = this->ideal_Opcode();
19070 int vlen = Matcher::vector_length(this, $src2);
19071 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19072 %}
19073 ins_pipe( pipe_slow );
19074 %}
19075
19076 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19077 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19078 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19079 // src1 contains reduction identity
19080 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19081 match(Set dst (AddReductionVD src1 src2));
19082 match(Set dst (MulReductionVD src1 src2));
19083 effect(TEMP dst, TEMP vtmp);
19084 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19085 ins_encode %{
19086 int opcode = this->ideal_Opcode();
19087 int vlen = Matcher::vector_length(this, $src2);
19088 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19089 %}
19090 ins_pipe( pipe_slow );
19091 %}
19092
19093 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19094 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19095 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19096 // src1 contains reduction identity
19097 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19098 match(Set dst (AddReductionVD src1 src2));
19099 match(Set dst (MulReductionVD src1 src2));
19100 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19101 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19102 ins_encode %{
19103 int opcode = this->ideal_Opcode();
19104 int vlen = Matcher::vector_length(this, $src2);
19105 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19106 %}
19107 ins_pipe( pipe_slow );
19108 %}
19109
19110 // =======================Byte Reduction==========================================
19111
19112 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19113 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19114 match(Set dst (AddReductionVI src1 src2));
19115 match(Set dst (AndReductionV src1 src2));
19116 match(Set dst ( OrReductionV src1 src2));
19117 match(Set dst (XorReductionV src1 src2));
19118 match(Set dst (MinReductionV src1 src2));
19119 match(Set dst (MaxReductionV src1 src2));
19120 match(Set dst (UMinReductionV src1 src2));
19121 match(Set dst (UMaxReductionV src1 src2));
19122 effect(TEMP vtmp1, TEMP vtmp2);
19123 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19124 ins_encode %{
19125 int opcode = this->ideal_Opcode();
19126 int vlen = Matcher::vector_length(this, $src2);
19127 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19128 %}
19129 ins_pipe( pipe_slow );
19130 %}
19131
19132 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19133 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19134 match(Set dst (AddReductionVI src1 src2));
19135 match(Set dst (AndReductionV src1 src2));
19136 match(Set dst ( OrReductionV src1 src2));
19137 match(Set dst (XorReductionV src1 src2));
19138 match(Set dst (MinReductionV src1 src2));
19139 match(Set dst (MaxReductionV src1 src2));
19140 match(Set dst (UMinReductionV src1 src2));
19141 match(Set dst (UMaxReductionV src1 src2));
19142 effect(TEMP vtmp1, TEMP vtmp2);
19143 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19144 ins_encode %{
19145 int opcode = this->ideal_Opcode();
19146 int vlen = Matcher::vector_length(this, $src2);
19147 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19148 %}
19149 ins_pipe( pipe_slow );
19150 %}
19151
19152 // =======================Short Reduction==========================================
19153
19154 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19155 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19156 match(Set dst (AddReductionVI src1 src2));
19157 match(Set dst (MulReductionVI src1 src2));
19158 match(Set dst (AndReductionV src1 src2));
19159 match(Set dst ( OrReductionV src1 src2));
19160 match(Set dst (XorReductionV src1 src2));
19161 match(Set dst (MinReductionV src1 src2));
19162 match(Set dst (MaxReductionV src1 src2));
19163 match(Set dst (UMinReductionV src1 src2));
19164 match(Set dst (UMaxReductionV src1 src2));
19165 effect(TEMP vtmp1, TEMP vtmp2);
19166 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19167 ins_encode %{
19168 int opcode = this->ideal_Opcode();
19169 int vlen = Matcher::vector_length(this, $src2);
19170 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19171 %}
19172 ins_pipe( pipe_slow );
19173 %}
19174
19175 // =======================Mul Reduction==========================================
19176
19177 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19178 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19179 Matcher::vector_length(n->in(2)) <= 32); // src2
19180 match(Set dst (MulReductionVI src1 src2));
19181 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19182 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19183 ins_encode %{
19184 int opcode = this->ideal_Opcode();
19185 int vlen = Matcher::vector_length(this, $src2);
19186 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19187 %}
19188 ins_pipe( pipe_slow );
19189 %}
19190
19191 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19192 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19193 Matcher::vector_length(n->in(2)) == 64); // src2
19194 match(Set dst (MulReductionVI src1 src2));
19195 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19196 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19197 ins_encode %{
19198 int opcode = this->ideal_Opcode();
19199 int vlen = Matcher::vector_length(this, $src2);
19200 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19201 %}
19202 ins_pipe( pipe_slow );
19203 %}
19204
19205 //--------------------Min/Max Float Reduction --------------------
19206 // Float Min Reduction
19207 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19208 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19209 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19210 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19211 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19212 Matcher::vector_length(n->in(2)) == 2);
19213 match(Set dst (MinReductionV src1 src2));
19214 match(Set dst (MaxReductionV src1 src2));
19215 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19216 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19217 ins_encode %{
19218 assert(UseAVX > 0, "sanity");
19219
19220 int opcode = this->ideal_Opcode();
19221 int vlen = Matcher::vector_length(this, $src2);
19222 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19223 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19224 %}
19225 ins_pipe( pipe_slow );
19226 %}
19227
19228 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19229 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19230 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19231 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19232 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19233 Matcher::vector_length(n->in(2)) >= 4);
19234 match(Set dst (MinReductionV src1 src2));
19235 match(Set dst (MaxReductionV src1 src2));
19236 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19237 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19238 ins_encode %{
19239 assert(UseAVX > 0, "sanity");
19240
19241 int opcode = this->ideal_Opcode();
19242 int vlen = Matcher::vector_length(this, $src2);
19243 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19244 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19245 %}
19246 ins_pipe( pipe_slow );
19247 %}
19248
19249 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19250 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19251 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19252 Matcher::vector_length(n->in(2)) == 2);
19253 match(Set dst (MinReductionV dst src));
19254 match(Set dst (MaxReductionV dst src));
19255 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19256 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19257 ins_encode %{
19258 assert(UseAVX > 0, "sanity");
19259
19260 int opcode = this->ideal_Opcode();
19261 int vlen = Matcher::vector_length(this, $src);
19262 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19263 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19264 %}
19265 ins_pipe( pipe_slow );
19266 %}
19267
19268
19269 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19270 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19271 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19272 Matcher::vector_length(n->in(2)) >= 4);
19273 match(Set dst (MinReductionV dst src));
19274 match(Set dst (MaxReductionV dst src));
19275 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19276 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19277 ins_encode %{
19278 assert(UseAVX > 0, "sanity");
19279
19280 int opcode = this->ideal_Opcode();
19281 int vlen = Matcher::vector_length(this, $src);
19282 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19283 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19284 %}
19285 ins_pipe( pipe_slow );
19286 %}
19287
19288 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19289 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19290 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19291 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19292 Matcher::vector_length(n->in(2)) == 2);
19293 match(Set dst (MinReductionV src1 src2));
19294 match(Set dst (MaxReductionV src1 src2));
19295 effect(TEMP dst, TEMP xtmp1);
19296 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19297 ins_encode %{
19298 int opcode = this->ideal_Opcode();
19299 int vlen = Matcher::vector_length(this, $src2);
19300 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19301 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19302 %}
19303 ins_pipe( pipe_slow );
19304 %}
19305
19306 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19307 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19308 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19309 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19310 Matcher::vector_length(n->in(2)) >= 4);
19311 match(Set dst (MinReductionV src1 src2));
19312 match(Set dst (MaxReductionV src1 src2));
19313 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19314 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19315 ins_encode %{
19316 int opcode = this->ideal_Opcode();
19317 int vlen = Matcher::vector_length(this, $src2);
19318 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19319 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19320 %}
19321 ins_pipe( pipe_slow );
19322 %}
19323
19324 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19325 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19326 Matcher::vector_length(n->in(2)) == 2);
19327 match(Set dst (MinReductionV dst src));
19328 match(Set dst (MaxReductionV dst src));
19329 effect(TEMP dst, TEMP xtmp1);
19330 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19331 ins_encode %{
19332 int opcode = this->ideal_Opcode();
19333 int vlen = Matcher::vector_length(this, $src);
19334 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19335 $xtmp1$$XMMRegister);
19336 %}
19337 ins_pipe( pipe_slow );
19338 %}
19339
19340 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19341 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19342 Matcher::vector_length(n->in(2)) >= 4);
19343 match(Set dst (MinReductionV dst src));
19344 match(Set dst (MaxReductionV dst src));
19345 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19346 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19347 ins_encode %{
19348 int opcode = this->ideal_Opcode();
19349 int vlen = Matcher::vector_length(this, $src);
19350 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19351 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19352 %}
19353 ins_pipe( pipe_slow );
19354 %}
19355
19356 //--------------------Min Double Reduction --------------------
19357 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19358 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19359 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19360 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19361 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19362 Matcher::vector_length(n->in(2)) == 2);
19363 match(Set dst (MinReductionV src1 src2));
19364 match(Set dst (MaxReductionV src1 src2));
19365 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19366 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19367 ins_encode %{
19368 assert(UseAVX > 0, "sanity");
19369
19370 int opcode = this->ideal_Opcode();
19371 int vlen = Matcher::vector_length(this, $src2);
19372 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19373 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19374 %}
19375 ins_pipe( pipe_slow );
19376 %}
19377
19378 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19379 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19380 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19381 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19382 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19383 Matcher::vector_length(n->in(2)) >= 4);
19384 match(Set dst (MinReductionV src1 src2));
19385 match(Set dst (MaxReductionV src1 src2));
19386 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19387 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19388 ins_encode %{
19389 assert(UseAVX > 0, "sanity");
19390
19391 int opcode = this->ideal_Opcode();
19392 int vlen = Matcher::vector_length(this, $src2);
19393 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19394 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19395 %}
19396 ins_pipe( pipe_slow );
19397 %}
19398
19399
19400 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19401 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19402 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19403 Matcher::vector_length(n->in(2)) == 2);
19404 match(Set dst (MinReductionV dst src));
19405 match(Set dst (MaxReductionV dst src));
19406 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19407 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19408 ins_encode %{
19409 assert(UseAVX > 0, "sanity");
19410
19411 int opcode = this->ideal_Opcode();
19412 int vlen = Matcher::vector_length(this, $src);
19413 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19414 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19415 %}
19416 ins_pipe( pipe_slow );
19417 %}
19418
19419 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19420 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19421 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19422 Matcher::vector_length(n->in(2)) >= 4);
19423 match(Set dst (MinReductionV dst src));
19424 match(Set dst (MaxReductionV dst src));
19425 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19426 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19427 ins_encode %{
19428 assert(UseAVX > 0, "sanity");
19429
19430 int opcode = this->ideal_Opcode();
19431 int vlen = Matcher::vector_length(this, $src);
19432 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19433 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19434 %}
19435 ins_pipe( pipe_slow );
19436 %}
19437
19438 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19439 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19440 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19441 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19442 Matcher::vector_length(n->in(2)) == 2);
19443 match(Set dst (MinReductionV src1 src2));
19444 match(Set dst (MaxReductionV src1 src2));
19445 effect(TEMP dst, TEMP xtmp1);
19446 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19447 ins_encode %{
19448 int opcode = this->ideal_Opcode();
19449 int vlen = Matcher::vector_length(this, $src2);
19450 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19451 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19452 %}
19453 ins_pipe( pipe_slow );
19454 %}
19455
19456 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19457 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19458 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19459 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19460 Matcher::vector_length(n->in(2)) >= 4);
19461 match(Set dst (MinReductionV src1 src2));
19462 match(Set dst (MaxReductionV src1 src2));
19463 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19464 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19465 ins_encode %{
19466 int opcode = this->ideal_Opcode();
19467 int vlen = Matcher::vector_length(this, $src2);
19468 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19469 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19470 %}
19471 ins_pipe( pipe_slow );
19472 %}
19473
19474
19475 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19476 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19477 Matcher::vector_length(n->in(2)) == 2);
19478 match(Set dst (MinReductionV dst src));
19479 match(Set dst (MaxReductionV dst src));
19480 effect(TEMP dst, TEMP xtmp1);
19481 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19482 ins_encode %{
19483 int opcode = this->ideal_Opcode();
19484 int vlen = Matcher::vector_length(this, $src);
19485 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19486 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19487 %}
19488 ins_pipe( pipe_slow );
19489 %}
19490
19491 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19492 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19493 Matcher::vector_length(n->in(2)) >= 4);
19494 match(Set dst (MinReductionV dst src));
19495 match(Set dst (MaxReductionV dst src));
19496 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19497 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19498 ins_encode %{
19499 int opcode = this->ideal_Opcode();
19500 int vlen = Matcher::vector_length(this, $src);
19501 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19502 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19503 %}
19504 ins_pipe( pipe_slow );
19505 %}
19506
19507 // ====================VECTOR ARITHMETIC=======================================
19508
19509 // --------------------------------- ADD --------------------------------------
19510
19511 // Bytes vector add
19512 instruct vaddB(vec dst, vec src) %{
19513 predicate(UseAVX == 0);
19514 match(Set dst (AddVB dst src));
19515 format %{ "paddb $dst,$src\t! add packedB" %}
19516 ins_encode %{
19517 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19518 %}
19519 ins_pipe( pipe_slow );
19520 %}
19521
19522 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19523 predicate(UseAVX > 0);
19524 match(Set dst (AddVB src1 src2));
19525 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19526 ins_encode %{
19527 int vlen_enc = vector_length_encoding(this);
19528 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19529 %}
19530 ins_pipe( pipe_slow );
19531 %}
19532
19533 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19534 predicate((UseAVX > 0) &&
19535 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19536 match(Set dst (AddVB src (LoadVector mem)));
19537 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19538 ins_encode %{
19539 int vlen_enc = vector_length_encoding(this);
19540 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19541 %}
19542 ins_pipe( pipe_slow );
19543 %}
19544
19545 // Shorts/Chars vector add
19546 instruct vaddS(vec dst, vec src) %{
19547 predicate(UseAVX == 0);
19548 match(Set dst (AddVS dst src));
19549 format %{ "paddw $dst,$src\t! add packedS" %}
19550 ins_encode %{
19551 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19552 %}
19553 ins_pipe( pipe_slow );
19554 %}
19555
19556 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19557 predicate(UseAVX > 0);
19558 match(Set dst (AddVS src1 src2));
19559 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19560 ins_encode %{
19561 int vlen_enc = vector_length_encoding(this);
19562 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19563 %}
19564 ins_pipe( pipe_slow );
19565 %}
19566
19567 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19568 predicate((UseAVX > 0) &&
19569 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19570 match(Set dst (AddVS src (LoadVector mem)));
19571 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19572 ins_encode %{
19573 int vlen_enc = vector_length_encoding(this);
19574 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19575 %}
19576 ins_pipe( pipe_slow );
19577 %}
19578
19579 // Integers vector add
19580 instruct vaddI(vec dst, vec src) %{
19581 predicate(UseAVX == 0);
19582 match(Set dst (AddVI dst src));
19583 format %{ "paddd $dst,$src\t! add packedI" %}
19584 ins_encode %{
19585 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19586 %}
19587 ins_pipe( pipe_slow );
19588 %}
19589
19590 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19591 predicate(UseAVX > 0);
19592 match(Set dst (AddVI src1 src2));
19593 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19594 ins_encode %{
19595 int vlen_enc = vector_length_encoding(this);
19596 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19597 %}
19598 ins_pipe( pipe_slow );
19599 %}
19600
19601
19602 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19603 predicate((UseAVX > 0) &&
19604 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19605 match(Set dst (AddVI src (LoadVector mem)));
19606 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19607 ins_encode %{
19608 int vlen_enc = vector_length_encoding(this);
19609 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19610 %}
19611 ins_pipe( pipe_slow );
19612 %}
19613
19614 // Longs vector add
19615 instruct vaddL(vec dst, vec src) %{
19616 predicate(UseAVX == 0);
19617 match(Set dst (AddVL dst src));
19618 format %{ "paddq $dst,$src\t! add packedL" %}
19619 ins_encode %{
19620 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19621 %}
19622 ins_pipe( pipe_slow );
19623 %}
19624
19625 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19626 predicate(UseAVX > 0);
19627 match(Set dst (AddVL src1 src2));
19628 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19629 ins_encode %{
19630 int vlen_enc = vector_length_encoding(this);
19631 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19632 %}
19633 ins_pipe( pipe_slow );
19634 %}
19635
19636 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19637 predicate((UseAVX > 0) &&
19638 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19639 match(Set dst (AddVL src (LoadVector mem)));
19640 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19641 ins_encode %{
19642 int vlen_enc = vector_length_encoding(this);
19643 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19644 %}
19645 ins_pipe( pipe_slow );
19646 %}
19647
19648 // Floats vector add
19649 instruct vaddF(vec dst, vec src) %{
19650 predicate(UseAVX == 0);
19651 match(Set dst (AddVF dst src));
19652 format %{ "addps $dst,$src\t! add packedF" %}
19653 ins_encode %{
19654 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19655 %}
19656 ins_pipe( pipe_slow );
19657 %}
19658
19659 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19660 predicate(UseAVX > 0);
19661 match(Set dst (AddVF src1 src2));
19662 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19663 ins_encode %{
19664 int vlen_enc = vector_length_encoding(this);
19665 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19666 %}
19667 ins_pipe( pipe_slow );
19668 %}
19669
19670 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19671 predicate((UseAVX > 0) &&
19672 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19673 match(Set dst (AddVF src (LoadVector mem)));
19674 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
19675 ins_encode %{
19676 int vlen_enc = vector_length_encoding(this);
19677 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19678 %}
19679 ins_pipe( pipe_slow );
19680 %}
19681
19682 // Doubles vector add
19683 instruct vaddD(vec dst, vec src) %{
19684 predicate(UseAVX == 0);
19685 match(Set dst (AddVD dst src));
19686 format %{ "addpd $dst,$src\t! add packedD" %}
19687 ins_encode %{
19688 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19689 %}
19690 ins_pipe( pipe_slow );
19691 %}
19692
19693 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19694 predicate(UseAVX > 0);
19695 match(Set dst (AddVD src1 src2));
19696 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
19697 ins_encode %{
19698 int vlen_enc = vector_length_encoding(this);
19699 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19700 %}
19701 ins_pipe( pipe_slow );
19702 %}
19703
19704 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19705 predicate((UseAVX > 0) &&
19706 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19707 match(Set dst (AddVD src (LoadVector mem)));
19708 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
19709 ins_encode %{
19710 int vlen_enc = vector_length_encoding(this);
19711 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19712 %}
19713 ins_pipe( pipe_slow );
19714 %}
19715
19716 // --------------------------------- SUB --------------------------------------
19717
19718 // Bytes vector sub
19719 instruct vsubB(vec dst, vec src) %{
19720 predicate(UseAVX == 0);
19721 match(Set dst (SubVB dst src));
19722 format %{ "psubb $dst,$src\t! sub packedB" %}
19723 ins_encode %{
19724 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
19725 %}
19726 ins_pipe( pipe_slow );
19727 %}
19728
19729 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
19730 predicate(UseAVX > 0);
19731 match(Set dst (SubVB src1 src2));
19732 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
19733 ins_encode %{
19734 int vlen_enc = vector_length_encoding(this);
19735 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19736 %}
19737 ins_pipe( pipe_slow );
19738 %}
19739
19740 instruct vsubB_mem(vec dst, vec src, memory mem) %{
19741 predicate((UseAVX > 0) &&
19742 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19743 match(Set dst (SubVB src (LoadVector mem)));
19744 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
19745 ins_encode %{
19746 int vlen_enc = vector_length_encoding(this);
19747 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19748 %}
19749 ins_pipe( pipe_slow );
19750 %}
19751
19752 // Shorts/Chars vector sub
19753 instruct vsubS(vec dst, vec src) %{
19754 predicate(UseAVX == 0);
19755 match(Set dst (SubVS dst src));
19756 format %{ "psubw $dst,$src\t! sub packedS" %}
19757 ins_encode %{
19758 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
19759 %}
19760 ins_pipe( pipe_slow );
19761 %}
19762
19763
19764 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
19765 predicate(UseAVX > 0);
19766 match(Set dst (SubVS src1 src2));
19767 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
19768 ins_encode %{
19769 int vlen_enc = vector_length_encoding(this);
19770 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19771 %}
19772 ins_pipe( pipe_slow );
19773 %}
19774
19775 instruct vsubS_mem(vec dst, vec src, memory mem) %{
19776 predicate((UseAVX > 0) &&
19777 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19778 match(Set dst (SubVS src (LoadVector mem)));
19779 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
19780 ins_encode %{
19781 int vlen_enc = vector_length_encoding(this);
19782 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19783 %}
19784 ins_pipe( pipe_slow );
19785 %}
19786
19787 // Integers vector sub
19788 instruct vsubI(vec dst, vec src) %{
19789 predicate(UseAVX == 0);
19790 match(Set dst (SubVI dst src));
19791 format %{ "psubd $dst,$src\t! sub packedI" %}
19792 ins_encode %{
19793 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
19794 %}
19795 ins_pipe( pipe_slow );
19796 %}
19797
19798 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
19799 predicate(UseAVX > 0);
19800 match(Set dst (SubVI src1 src2));
19801 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
19802 ins_encode %{
19803 int vlen_enc = vector_length_encoding(this);
19804 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19805 %}
19806 ins_pipe( pipe_slow );
19807 %}
19808
19809 instruct vsubI_mem(vec dst, vec src, memory mem) %{
19810 predicate((UseAVX > 0) &&
19811 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19812 match(Set dst (SubVI src (LoadVector mem)));
19813 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
19814 ins_encode %{
19815 int vlen_enc = vector_length_encoding(this);
19816 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19817 %}
19818 ins_pipe( pipe_slow );
19819 %}
19820
19821 // Longs vector sub
19822 instruct vsubL(vec dst, vec src) %{
19823 predicate(UseAVX == 0);
19824 match(Set dst (SubVL dst src));
19825 format %{ "psubq $dst,$src\t! sub packedL" %}
19826 ins_encode %{
19827 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
19828 %}
19829 ins_pipe( pipe_slow );
19830 %}
19831
19832 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
19833 predicate(UseAVX > 0);
19834 match(Set dst (SubVL src1 src2));
19835 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
19836 ins_encode %{
19837 int vlen_enc = vector_length_encoding(this);
19838 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19839 %}
19840 ins_pipe( pipe_slow );
19841 %}
19842
19843
19844 instruct vsubL_mem(vec dst, vec src, memory mem) %{
19845 predicate((UseAVX > 0) &&
19846 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19847 match(Set dst (SubVL src (LoadVector mem)));
19848 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
19849 ins_encode %{
19850 int vlen_enc = vector_length_encoding(this);
19851 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19852 %}
19853 ins_pipe( pipe_slow );
19854 %}
19855
19856 // Floats vector sub
19857 instruct vsubF(vec dst, vec src) %{
19858 predicate(UseAVX == 0);
19859 match(Set dst (SubVF dst src));
19860 format %{ "subps $dst,$src\t! sub packedF" %}
19861 ins_encode %{
19862 __ subps($dst$$XMMRegister, $src$$XMMRegister);
19863 %}
19864 ins_pipe( pipe_slow );
19865 %}
19866
19867 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
19868 predicate(UseAVX > 0);
19869 match(Set dst (SubVF src1 src2));
19870 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
19871 ins_encode %{
19872 int vlen_enc = vector_length_encoding(this);
19873 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19874 %}
19875 ins_pipe( pipe_slow );
19876 %}
19877
19878 instruct vsubF_mem(vec dst, vec src, memory mem) %{
19879 predicate((UseAVX > 0) &&
19880 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19881 match(Set dst (SubVF src (LoadVector mem)));
19882 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
19883 ins_encode %{
19884 int vlen_enc = vector_length_encoding(this);
19885 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19886 %}
19887 ins_pipe( pipe_slow );
19888 %}
19889
19890 // Doubles vector sub
19891 instruct vsubD(vec dst, vec src) %{
19892 predicate(UseAVX == 0);
19893 match(Set dst (SubVD dst src));
19894 format %{ "subpd $dst,$src\t! sub packedD" %}
19895 ins_encode %{
19896 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
19897 %}
19898 ins_pipe( pipe_slow );
19899 %}
19900
19901 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
19902 predicate(UseAVX > 0);
19903 match(Set dst (SubVD src1 src2));
19904 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
19905 ins_encode %{
19906 int vlen_enc = vector_length_encoding(this);
19907 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19908 %}
19909 ins_pipe( pipe_slow );
19910 %}
19911
19912 instruct vsubD_mem(vec dst, vec src, memory mem) %{
19913 predicate((UseAVX > 0) &&
19914 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19915 match(Set dst (SubVD src (LoadVector mem)));
19916 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
19917 ins_encode %{
19918 int vlen_enc = vector_length_encoding(this);
19919 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19920 %}
19921 ins_pipe( pipe_slow );
19922 %}
19923
19924 // --------------------------------- MUL --------------------------------------
19925
19926 // Byte vector mul
19927 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
19928 predicate(Matcher::vector_length_in_bytes(n) <= 8);
19929 match(Set dst (MulVB src1 src2));
19930 effect(TEMP dst, TEMP xtmp);
19931 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19932 ins_encode %{
19933 assert(UseSSE > 3, "required");
19934 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
19935 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
19936 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19937 __ psllw($dst$$XMMRegister, 8);
19938 __ psrlw($dst$$XMMRegister, 8);
19939 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
19940 %}
19941 ins_pipe( pipe_slow );
19942 %}
19943
19944 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
19945 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
19946 match(Set dst (MulVB src1 src2));
19947 effect(TEMP dst, TEMP xtmp);
19948 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19949 ins_encode %{
19950 assert(UseSSE > 3, "required");
19951 // Odd-index elements
19952 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
19953 __ psrlw($dst$$XMMRegister, 8);
19954 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
19955 __ psrlw($xtmp$$XMMRegister, 8);
19956 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19957 __ psllw($dst$$XMMRegister, 8);
19958 // Even-index elements
19959 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
19960 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
19961 __ psllw($xtmp$$XMMRegister, 8);
19962 __ psrlw($xtmp$$XMMRegister, 8);
19963 // Combine
19964 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
19965 %}
19966 ins_pipe( pipe_slow );
19967 %}
19968
19969 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
19970 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
19971 match(Set dst (MulVB src1 src2));
19972 effect(TEMP xtmp1, TEMP xtmp2);
19973 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
19974 ins_encode %{
19975 int vlen_enc = vector_length_encoding(this);
19976 // Odd-index elements
19977 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
19978 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
19979 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19980 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
19981 // Even-index elements
19982 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19983 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19984 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19985 // Combine
19986 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19987 %}
19988 ins_pipe( pipe_slow );
19989 %}
19990
19991 // Shorts/Chars vector mul
19992 instruct vmulS(vec dst, vec src) %{
19993 predicate(UseAVX == 0);
19994 match(Set dst (MulVS dst src));
19995 format %{ "pmullw $dst,$src\t! mul packedS" %}
19996 ins_encode %{
19997 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
19998 %}
19999 ins_pipe( pipe_slow );
20000 %}
20001
20002 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20003 predicate(UseAVX > 0);
20004 match(Set dst (MulVS src1 src2));
20005 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20006 ins_encode %{
20007 int vlen_enc = vector_length_encoding(this);
20008 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20009 %}
20010 ins_pipe( pipe_slow );
20011 %}
20012
20013 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20014 predicate((UseAVX > 0) &&
20015 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20016 match(Set dst (MulVS src (LoadVector mem)));
20017 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20018 ins_encode %{
20019 int vlen_enc = vector_length_encoding(this);
20020 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20021 %}
20022 ins_pipe( pipe_slow );
20023 %}
20024
20025 // Integers vector mul
20026 instruct vmulI(vec dst, vec src) %{
20027 predicate(UseAVX == 0);
20028 match(Set dst (MulVI dst src));
20029 format %{ "pmulld $dst,$src\t! mul packedI" %}
20030 ins_encode %{
20031 assert(UseSSE > 3, "required");
20032 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20033 %}
20034 ins_pipe( pipe_slow );
20035 %}
20036
20037 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20038 predicate(UseAVX > 0);
20039 match(Set dst (MulVI src1 src2));
20040 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20041 ins_encode %{
20042 int vlen_enc = vector_length_encoding(this);
20043 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20044 %}
20045 ins_pipe( pipe_slow );
20046 %}
20047
20048 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20049 predicate((UseAVX > 0) &&
20050 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20051 match(Set dst (MulVI src (LoadVector mem)));
20052 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20053 ins_encode %{
20054 int vlen_enc = vector_length_encoding(this);
20055 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20056 %}
20057 ins_pipe( pipe_slow );
20058 %}
20059
20060 // Longs vector mul
20061 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20062 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20063 VM_Version::supports_avx512dq()) ||
20064 VM_Version::supports_avx512vldq());
20065 match(Set dst (MulVL src1 src2));
20066 ins_cost(500);
20067 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20068 ins_encode %{
20069 assert(UseAVX > 2, "required");
20070 int vlen_enc = vector_length_encoding(this);
20071 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20072 %}
20073 ins_pipe( pipe_slow );
20074 %}
20075
20076 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20077 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20078 VM_Version::supports_avx512dq()) ||
20079 (Matcher::vector_length_in_bytes(n) > 8 &&
20080 VM_Version::supports_avx512vldq()));
20081 match(Set dst (MulVL src (LoadVector mem)));
20082 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20083 ins_cost(500);
20084 ins_encode %{
20085 assert(UseAVX > 2, "required");
20086 int vlen_enc = vector_length_encoding(this);
20087 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20088 %}
20089 ins_pipe( pipe_slow );
20090 %}
20091
20092 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20093 predicate(UseAVX == 0);
20094 match(Set dst (MulVL src1 src2));
20095 ins_cost(500);
20096 effect(TEMP dst, TEMP xtmp);
20097 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20098 ins_encode %{
20099 assert(VM_Version::supports_sse4_1(), "required");
20100 // Get the lo-hi products, only the lower 32 bits is in concerns
20101 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20102 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20103 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20104 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20105 __ psllq($dst$$XMMRegister, 32);
20106 // Get the lo-lo products
20107 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20108 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20109 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20110 %}
20111 ins_pipe( pipe_slow );
20112 %}
20113
20114 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20115 predicate(UseAVX > 0 &&
20116 ((Matcher::vector_length_in_bytes(n) == 64 &&
20117 !VM_Version::supports_avx512dq()) ||
20118 (Matcher::vector_length_in_bytes(n) < 64 &&
20119 !VM_Version::supports_avx512vldq())));
20120 match(Set dst (MulVL src1 src2));
20121 effect(TEMP xtmp1, TEMP xtmp2);
20122 ins_cost(500);
20123 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20124 ins_encode %{
20125 int vlen_enc = vector_length_encoding(this);
20126 // Get the lo-hi products, only the lower 32 bits is in concerns
20127 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20128 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20129 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20130 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20131 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20132 // Get the lo-lo products
20133 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20134 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20135 %}
20136 ins_pipe( pipe_slow );
20137 %}
20138
20139 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20140 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20141 match(Set dst (MulVL src1 src2));
20142 ins_cost(100);
20143 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20144 ins_encode %{
20145 int vlen_enc = vector_length_encoding(this);
20146 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20147 %}
20148 ins_pipe( pipe_slow );
20149 %}
20150
20151 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20152 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20153 match(Set dst (MulVL src1 src2));
20154 ins_cost(100);
20155 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20156 ins_encode %{
20157 int vlen_enc = vector_length_encoding(this);
20158 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20159 %}
20160 ins_pipe( pipe_slow );
20161 %}
20162
20163 // Floats vector mul
20164 instruct vmulF(vec dst, vec src) %{
20165 predicate(UseAVX == 0);
20166 match(Set dst (MulVF dst src));
20167 format %{ "mulps $dst,$src\t! mul packedF" %}
20168 ins_encode %{
20169 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20170 %}
20171 ins_pipe( pipe_slow );
20172 %}
20173
20174 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20175 predicate(UseAVX > 0);
20176 match(Set dst (MulVF src1 src2));
20177 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20178 ins_encode %{
20179 int vlen_enc = vector_length_encoding(this);
20180 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20181 %}
20182 ins_pipe( pipe_slow );
20183 %}
20184
20185 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20186 predicate((UseAVX > 0) &&
20187 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20188 match(Set dst (MulVF src (LoadVector mem)));
20189 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20190 ins_encode %{
20191 int vlen_enc = vector_length_encoding(this);
20192 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20193 %}
20194 ins_pipe( pipe_slow );
20195 %}
20196
20197 // Doubles vector mul
20198 instruct vmulD(vec dst, vec src) %{
20199 predicate(UseAVX == 0);
20200 match(Set dst (MulVD dst src));
20201 format %{ "mulpd $dst,$src\t! mul packedD" %}
20202 ins_encode %{
20203 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20204 %}
20205 ins_pipe( pipe_slow );
20206 %}
20207
20208 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20209 predicate(UseAVX > 0);
20210 match(Set dst (MulVD src1 src2));
20211 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20212 ins_encode %{
20213 int vlen_enc = vector_length_encoding(this);
20214 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20215 %}
20216 ins_pipe( pipe_slow );
20217 %}
20218
20219 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20220 predicate((UseAVX > 0) &&
20221 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20222 match(Set dst (MulVD src (LoadVector mem)));
20223 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20224 ins_encode %{
20225 int vlen_enc = vector_length_encoding(this);
20226 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20227 %}
20228 ins_pipe( pipe_slow );
20229 %}
20230
20231 // --------------------------------- DIV --------------------------------------
20232
20233 // Floats vector div
20234 instruct vdivF(vec dst, vec src) %{
20235 predicate(UseAVX == 0);
20236 match(Set dst (DivVF dst src));
20237 format %{ "divps $dst,$src\t! div packedF" %}
20238 ins_encode %{
20239 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20240 %}
20241 ins_pipe( pipe_slow );
20242 %}
20243
20244 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20245 predicate(UseAVX > 0);
20246 match(Set dst (DivVF src1 src2));
20247 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20248 ins_encode %{
20249 int vlen_enc = vector_length_encoding(this);
20250 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20251 %}
20252 ins_pipe( pipe_slow );
20253 %}
20254
20255 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20256 predicate((UseAVX > 0) &&
20257 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20258 match(Set dst (DivVF src (LoadVector mem)));
20259 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20260 ins_encode %{
20261 int vlen_enc = vector_length_encoding(this);
20262 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20263 %}
20264 ins_pipe( pipe_slow );
20265 %}
20266
20267 // Doubles vector div
20268 instruct vdivD(vec dst, vec src) %{
20269 predicate(UseAVX == 0);
20270 match(Set dst (DivVD dst src));
20271 format %{ "divpd $dst,$src\t! div packedD" %}
20272 ins_encode %{
20273 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20274 %}
20275 ins_pipe( pipe_slow );
20276 %}
20277
20278 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20279 predicate(UseAVX > 0);
20280 match(Set dst (DivVD src1 src2));
20281 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20282 ins_encode %{
20283 int vlen_enc = vector_length_encoding(this);
20284 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20285 %}
20286 ins_pipe( pipe_slow );
20287 %}
20288
20289 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20290 predicate((UseAVX > 0) &&
20291 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20292 match(Set dst (DivVD src (LoadVector mem)));
20293 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20294 ins_encode %{
20295 int vlen_enc = vector_length_encoding(this);
20296 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20297 %}
20298 ins_pipe( pipe_slow );
20299 %}
20300
20301 // ------------------------------ MinMax ---------------------------------------
20302
20303 // Byte, Short, Int vector Min/Max
20304 instruct minmax_reg_sse(vec dst, vec src) %{
20305 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20306 UseAVX == 0);
20307 match(Set dst (MinV dst src));
20308 match(Set dst (MaxV dst src));
20309 format %{ "vector_minmax $dst,$src\t! " %}
20310 ins_encode %{
20311 assert(UseSSE >= 4, "required");
20312
20313 int opcode = this->ideal_Opcode();
20314 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20315 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20316 %}
20317 ins_pipe( pipe_slow );
20318 %}
20319
20320 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20321 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20322 UseAVX > 0);
20323 match(Set dst (MinV src1 src2));
20324 match(Set dst (MaxV src1 src2));
20325 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20326 ins_encode %{
20327 int opcode = this->ideal_Opcode();
20328 int vlen_enc = vector_length_encoding(this);
20329 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20330
20331 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20332 %}
20333 ins_pipe( pipe_slow );
20334 %}
20335
20336 // Long vector Min/Max
20337 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20338 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20339 UseAVX == 0);
20340 match(Set dst (MinV dst src));
20341 match(Set dst (MaxV src dst));
20342 effect(TEMP dst, TEMP tmp);
20343 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20344 ins_encode %{
20345 assert(UseSSE >= 4, "required");
20346
20347 int opcode = this->ideal_Opcode();
20348 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20349 assert(elem_bt == T_LONG, "sanity");
20350
20351 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20352 %}
20353 ins_pipe( pipe_slow );
20354 %}
20355
20356 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20357 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20358 UseAVX > 0 && !VM_Version::supports_avx512vl());
20359 match(Set dst (MinV src1 src2));
20360 match(Set dst (MaxV src1 src2));
20361 effect(TEMP dst);
20362 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20363 ins_encode %{
20364 int vlen_enc = vector_length_encoding(this);
20365 int opcode = this->ideal_Opcode();
20366 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20367 assert(elem_bt == T_LONG, "sanity");
20368
20369 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20370 %}
20371 ins_pipe( pipe_slow );
20372 %}
20373
20374 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20375 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20376 Matcher::vector_element_basic_type(n) == T_LONG);
20377 match(Set dst (MinV src1 src2));
20378 match(Set dst (MaxV src1 src2));
20379 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20380 ins_encode %{
20381 assert(UseAVX > 2, "required");
20382
20383 int vlen_enc = vector_length_encoding(this);
20384 int opcode = this->ideal_Opcode();
20385 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20386 assert(elem_bt == T_LONG, "sanity");
20387
20388 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20389 %}
20390 ins_pipe( pipe_slow );
20391 %}
20392
20393 // Float/Double vector Min/Max
20394 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20395 predicate(VM_Version::supports_avx10_2() &&
20396 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20397 match(Set dst (MinV a b));
20398 match(Set dst (MaxV a b));
20399 format %{ "vector_minmaxFP $dst, $a, $b" %}
20400 ins_encode %{
20401 int vlen_enc = vector_length_encoding(this);
20402 int opcode = this->ideal_Opcode();
20403 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20404 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20405 %}
20406 ins_pipe( pipe_slow );
20407 %}
20408
20409 // Float/Double vector Min/Max
20410 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20411 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20412 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20413 UseAVX > 0);
20414 match(Set dst (MinV a b));
20415 match(Set dst (MaxV a b));
20416 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20417 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20418 ins_encode %{
20419 assert(UseAVX > 0, "required");
20420
20421 int opcode = this->ideal_Opcode();
20422 int vlen_enc = vector_length_encoding(this);
20423 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20424
20425 __ vminmax_fp(opcode, elem_bt,
20426 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20427 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20428 %}
20429 ins_pipe( pipe_slow );
20430 %}
20431
20432 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20433 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20434 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20435 match(Set dst (MinV a b));
20436 match(Set dst (MaxV a b));
20437 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20438 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20439 ins_encode %{
20440 assert(UseAVX > 2, "required");
20441
20442 int opcode = this->ideal_Opcode();
20443 int vlen_enc = vector_length_encoding(this);
20444 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20445
20446 __ evminmax_fp(opcode, elem_bt,
20447 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20448 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20449 %}
20450 ins_pipe( pipe_slow );
20451 %}
20452
20453 // ------------------------------ Unsigned vector Min/Max ----------------------
20454
20455 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20456 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20457 match(Set dst (UMinV a b));
20458 match(Set dst (UMaxV a b));
20459 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20460 ins_encode %{
20461 int opcode = this->ideal_Opcode();
20462 int vlen_enc = vector_length_encoding(this);
20463 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20464 assert(is_integral_type(elem_bt), "");
20465 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20466 %}
20467 ins_pipe( pipe_slow );
20468 %}
20469
20470 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20471 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20472 match(Set dst (UMinV a (LoadVector b)));
20473 match(Set dst (UMaxV a (LoadVector b)));
20474 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20475 ins_encode %{
20476 int opcode = this->ideal_Opcode();
20477 int vlen_enc = vector_length_encoding(this);
20478 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20479 assert(is_integral_type(elem_bt), "");
20480 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20481 %}
20482 ins_pipe( pipe_slow );
20483 %}
20484
20485 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20486 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20487 match(Set dst (UMinV a b));
20488 match(Set dst (UMaxV a b));
20489 effect(TEMP xtmp1, TEMP xtmp2);
20490 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20491 ins_encode %{
20492 int opcode = this->ideal_Opcode();
20493 int vlen_enc = vector_length_encoding(this);
20494 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20495 %}
20496 ins_pipe( pipe_slow );
20497 %}
20498
20499 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20500 match(Set dst (UMinV (Binary dst src2) mask));
20501 match(Set dst (UMaxV (Binary dst src2) mask));
20502 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20503 ins_encode %{
20504 int vlen_enc = vector_length_encoding(this);
20505 BasicType bt = Matcher::vector_element_basic_type(this);
20506 int opc = this->ideal_Opcode();
20507 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20508 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20509 %}
20510 ins_pipe( pipe_slow );
20511 %}
20512
20513 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20514 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20515 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20516 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20517 ins_encode %{
20518 int vlen_enc = vector_length_encoding(this);
20519 BasicType bt = Matcher::vector_element_basic_type(this);
20520 int opc = this->ideal_Opcode();
20521 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20522 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20523 %}
20524 ins_pipe( pipe_slow );
20525 %}
20526
20527 // --------------------------------- Signum/CopySign ---------------------------
20528
20529 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20530 match(Set dst (SignumF dst (Binary zero one)));
20531 effect(KILL cr);
20532 format %{ "signumF $dst, $dst" %}
20533 ins_encode %{
20534 int opcode = this->ideal_Opcode();
20535 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20536 %}
20537 ins_pipe( pipe_slow );
20538 %}
20539
20540 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20541 match(Set dst (SignumD dst (Binary zero one)));
20542 effect(KILL cr);
20543 format %{ "signumD $dst, $dst" %}
20544 ins_encode %{
20545 int opcode = this->ideal_Opcode();
20546 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20547 %}
20548 ins_pipe( pipe_slow );
20549 %}
20550
20551 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20552 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20553 match(Set dst (SignumVF src (Binary zero one)));
20554 match(Set dst (SignumVD src (Binary zero one)));
20555 effect(TEMP dst, TEMP xtmp1);
20556 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20557 ins_encode %{
20558 int opcode = this->ideal_Opcode();
20559 int vec_enc = vector_length_encoding(this);
20560 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20561 $xtmp1$$XMMRegister, vec_enc);
20562 %}
20563 ins_pipe( pipe_slow );
20564 %}
20565
20566 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20567 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20568 match(Set dst (SignumVF src (Binary zero one)));
20569 match(Set dst (SignumVD src (Binary zero one)));
20570 effect(TEMP dst, TEMP ktmp1);
20571 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20572 ins_encode %{
20573 int opcode = this->ideal_Opcode();
20574 int vec_enc = vector_length_encoding(this);
20575 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20576 $ktmp1$$KRegister, vec_enc);
20577 %}
20578 ins_pipe( pipe_slow );
20579 %}
20580
20581 // ---------------------------------------
20582 // For copySign use 0xE4 as writemask for vpternlog
20583 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20584 // C (xmm2) is set to 0x7FFFFFFF
20585 // Wherever xmm2 is 0, we want to pick from B (sign)
20586 // Wherever xmm2 is 1, we want to pick from A (src)
20587 //
20588 // A B C Result
20589 // 0 0 0 0
20590 // 0 0 1 0
20591 // 0 1 0 1
20592 // 0 1 1 0
20593 // 1 0 0 0
20594 // 1 0 1 1
20595 // 1 1 0 1
20596 // 1 1 1 1
20597 //
20598 // Result going from high bit to low bit is 0x11100100 = 0xe4
20599 // ---------------------------------------
20600
20601 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20602 match(Set dst (CopySignF dst src));
20603 effect(TEMP tmp1, TEMP tmp2);
20604 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20605 ins_encode %{
20606 __ movl($tmp2$$Register, 0x7FFFFFFF);
20607 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20608 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20609 %}
20610 ins_pipe( pipe_slow );
20611 %}
20612
20613 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20614 match(Set dst (CopySignD dst (Binary src zero)));
20615 ins_cost(100);
20616 effect(TEMP tmp1, TEMP tmp2);
20617 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20618 ins_encode %{
20619 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20620 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20621 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20622 %}
20623 ins_pipe( pipe_slow );
20624 %}
20625
20626 //----------------------------- CompressBits/ExpandBits ------------------------
20627
20628 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20629 predicate(n->bottom_type()->isa_int());
20630 match(Set dst (CompressBits src mask));
20631 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20632 ins_encode %{
20633 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20634 %}
20635 ins_pipe( pipe_slow );
20636 %}
20637
20638 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20639 predicate(n->bottom_type()->isa_int());
20640 match(Set dst (ExpandBits src mask));
20641 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20642 ins_encode %{
20643 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20644 %}
20645 ins_pipe( pipe_slow );
20646 %}
20647
20648 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20649 predicate(n->bottom_type()->isa_int());
20650 match(Set dst (CompressBits src (LoadI mask)));
20651 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20652 ins_encode %{
20653 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20654 %}
20655 ins_pipe( pipe_slow );
20656 %}
20657
20658 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20659 predicate(n->bottom_type()->isa_int());
20660 match(Set dst (ExpandBits src (LoadI mask)));
20661 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20662 ins_encode %{
20663 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20664 %}
20665 ins_pipe( pipe_slow );
20666 %}
20667
20668 // --------------------------------- Sqrt --------------------------------------
20669
20670 instruct vsqrtF_reg(vec dst, vec src) %{
20671 match(Set dst (SqrtVF src));
20672 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
20673 ins_encode %{
20674 assert(UseAVX > 0, "required");
20675 int vlen_enc = vector_length_encoding(this);
20676 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20677 %}
20678 ins_pipe( pipe_slow );
20679 %}
20680
20681 instruct vsqrtF_mem(vec dst, memory mem) %{
20682 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20683 match(Set dst (SqrtVF (LoadVector mem)));
20684 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
20685 ins_encode %{
20686 assert(UseAVX > 0, "required");
20687 int vlen_enc = vector_length_encoding(this);
20688 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20689 %}
20690 ins_pipe( pipe_slow );
20691 %}
20692
20693 // Floating point vector sqrt
20694 instruct vsqrtD_reg(vec dst, vec src) %{
20695 match(Set dst (SqrtVD src));
20696 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
20697 ins_encode %{
20698 assert(UseAVX > 0, "required");
20699 int vlen_enc = vector_length_encoding(this);
20700 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20701 %}
20702 ins_pipe( pipe_slow );
20703 %}
20704
20705 instruct vsqrtD_mem(vec dst, memory mem) %{
20706 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20707 match(Set dst (SqrtVD (LoadVector mem)));
20708 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
20709 ins_encode %{
20710 assert(UseAVX > 0, "required");
20711 int vlen_enc = vector_length_encoding(this);
20712 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20713 %}
20714 ins_pipe( pipe_slow );
20715 %}
20716
20717 // ------------------------------ Shift ---------------------------------------
20718
20719 // Left and right shift count vectors are the same on x86
20720 // (only lowest bits of xmm reg are used for count).
20721 instruct vshiftcnt(vec dst, rRegI cnt) %{
20722 match(Set dst (LShiftCntV cnt));
20723 match(Set dst (RShiftCntV cnt));
20724 format %{ "movdl $dst,$cnt\t! load shift count" %}
20725 ins_encode %{
20726 __ movdl($dst$$XMMRegister, $cnt$$Register);
20727 %}
20728 ins_pipe( pipe_slow );
20729 %}
20730
20731 // Byte vector shift
20732 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
20733 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
20734 match(Set dst ( LShiftVB src shift));
20735 match(Set dst ( RShiftVB src shift));
20736 match(Set dst (URShiftVB src shift));
20737 effect(TEMP dst, USE src, USE shift, TEMP tmp);
20738 format %{"vector_byte_shift $dst,$src,$shift" %}
20739 ins_encode %{
20740 assert(UseSSE > 3, "required");
20741 int opcode = this->ideal_Opcode();
20742 bool sign = (opcode != Op_URShiftVB);
20743 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
20744 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
20745 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20746 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
20747 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20748 %}
20749 ins_pipe( pipe_slow );
20750 %}
20751
20752 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20753 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20754 UseAVX <= 1);
20755 match(Set dst ( LShiftVB src shift));
20756 match(Set dst ( RShiftVB src shift));
20757 match(Set dst (URShiftVB src shift));
20758 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
20759 format %{"vector_byte_shift $dst,$src,$shift" %}
20760 ins_encode %{
20761 assert(UseSSE > 3, "required");
20762 int opcode = this->ideal_Opcode();
20763 bool sign = (opcode != Op_URShiftVB);
20764 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
20765 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
20766 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
20767 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
20768 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
20769 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20770 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
20771 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
20772 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
20773 %}
20774 ins_pipe( pipe_slow );
20775 %}
20776
20777 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
20778 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20779 UseAVX > 1);
20780 match(Set dst ( LShiftVB src shift));
20781 match(Set dst ( RShiftVB src shift));
20782 match(Set dst (URShiftVB src shift));
20783 effect(TEMP dst, TEMP tmp);
20784 format %{"vector_byte_shift $dst,$src,$shift" %}
20785 ins_encode %{
20786 int opcode = this->ideal_Opcode();
20787 bool sign = (opcode != Op_URShiftVB);
20788 int vlen_enc = Assembler::AVX_256bit;
20789 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
20790 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20791 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20792 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
20793 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
20794 %}
20795 ins_pipe( pipe_slow );
20796 %}
20797
20798 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
20799 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
20800 match(Set dst ( LShiftVB src shift));
20801 match(Set dst ( RShiftVB src shift));
20802 match(Set dst (URShiftVB src shift));
20803 effect(TEMP dst, TEMP tmp);
20804 format %{"vector_byte_shift $dst,$src,$shift" %}
20805 ins_encode %{
20806 assert(UseAVX > 1, "required");
20807 int opcode = this->ideal_Opcode();
20808 bool sign = (opcode != Op_URShiftVB);
20809 int vlen_enc = Assembler::AVX_256bit;
20810 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
20811 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20812 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20813 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20814 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20815 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20816 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20817 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20818 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
20819 %}
20820 ins_pipe( pipe_slow );
20821 %}
20822
20823 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20824 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
20825 match(Set dst ( LShiftVB src shift));
20826 match(Set dst (RShiftVB src shift));
20827 match(Set dst (URShiftVB src shift));
20828 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
20829 format %{"vector_byte_shift $dst,$src,$shift" %}
20830 ins_encode %{
20831 assert(UseAVX > 2, "required");
20832 int opcode = this->ideal_Opcode();
20833 bool sign = (opcode != Op_URShiftVB);
20834 int vlen_enc = Assembler::AVX_512bit;
20835 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
20836 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
20837 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
20838 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20839 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20840 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20841 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20842 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20843 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20844 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
20845 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
20846 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20847 %}
20848 ins_pipe( pipe_slow );
20849 %}
20850
20851 // Shorts vector logical right shift produces incorrect Java result
20852 // for negative data because java code convert short value into int with
20853 // sign extension before a shift. But char vectors are fine since chars are
20854 // unsigned values.
20855 // Shorts/Chars vector left shift
20856 instruct vshiftS(vec dst, vec src, vec shift) %{
20857 predicate(!n->as_ShiftV()->is_var_shift());
20858 match(Set dst ( LShiftVS src shift));
20859 match(Set dst ( RShiftVS src shift));
20860 match(Set dst (URShiftVS src shift));
20861 effect(TEMP dst, USE src, USE shift);
20862 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
20863 ins_encode %{
20864 int opcode = this->ideal_Opcode();
20865 if (UseAVX > 0) {
20866 int vlen_enc = vector_length_encoding(this);
20867 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20868 } else {
20869 int vlen = Matcher::vector_length(this);
20870 if (vlen == 2) {
20871 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
20872 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20873 } else if (vlen == 4) {
20874 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20875 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20876 } else {
20877 assert (vlen == 8, "sanity");
20878 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20879 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20880 }
20881 }
20882 %}
20883 ins_pipe( pipe_slow );
20884 %}
20885
20886 // Integers vector left shift
20887 instruct vshiftI(vec dst, vec src, vec shift) %{
20888 predicate(!n->as_ShiftV()->is_var_shift());
20889 match(Set dst ( LShiftVI src shift));
20890 match(Set dst ( RShiftVI src shift));
20891 match(Set dst (URShiftVI src shift));
20892 effect(TEMP dst, USE src, USE shift);
20893 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
20894 ins_encode %{
20895 int opcode = this->ideal_Opcode();
20896 if (UseAVX > 0) {
20897 int vlen_enc = vector_length_encoding(this);
20898 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20899 } else {
20900 int vlen = Matcher::vector_length(this);
20901 if (vlen == 2) {
20902 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20903 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20904 } else {
20905 assert(vlen == 4, "sanity");
20906 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20907 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20908 }
20909 }
20910 %}
20911 ins_pipe( pipe_slow );
20912 %}
20913
20914 // Integers vector left constant shift
20915 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
20916 match(Set dst (LShiftVI src (LShiftCntV shift)));
20917 match(Set dst (RShiftVI src (RShiftCntV shift)));
20918 match(Set dst (URShiftVI src (RShiftCntV shift)));
20919 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
20920 ins_encode %{
20921 int opcode = this->ideal_Opcode();
20922 if (UseAVX > 0) {
20923 int vector_len = vector_length_encoding(this);
20924 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20925 } else {
20926 int vlen = Matcher::vector_length(this);
20927 if (vlen == 2) {
20928 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20929 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20930 } else {
20931 assert(vlen == 4, "sanity");
20932 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20933 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20934 }
20935 }
20936 %}
20937 ins_pipe( pipe_slow );
20938 %}
20939
20940 // Longs vector shift
20941 instruct vshiftL(vec dst, vec src, vec shift) %{
20942 predicate(!n->as_ShiftV()->is_var_shift());
20943 match(Set dst ( LShiftVL src shift));
20944 match(Set dst (URShiftVL src shift));
20945 effect(TEMP dst, USE src, USE shift);
20946 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
20947 ins_encode %{
20948 int opcode = this->ideal_Opcode();
20949 if (UseAVX > 0) {
20950 int vlen_enc = vector_length_encoding(this);
20951 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20952 } else {
20953 assert(Matcher::vector_length(this) == 2, "");
20954 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20955 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20956 }
20957 %}
20958 ins_pipe( pipe_slow );
20959 %}
20960
20961 // Longs vector constant shift
20962 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
20963 match(Set dst (LShiftVL src (LShiftCntV shift)));
20964 match(Set dst (URShiftVL src (RShiftCntV shift)));
20965 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
20966 ins_encode %{
20967 int opcode = this->ideal_Opcode();
20968 if (UseAVX > 0) {
20969 int vector_len = vector_length_encoding(this);
20970 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20971 } else {
20972 assert(Matcher::vector_length(this) == 2, "");
20973 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20974 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20975 }
20976 %}
20977 ins_pipe( pipe_slow );
20978 %}
20979
20980 // -------------------ArithmeticRightShift -----------------------------------
20981 // Long vector arithmetic right shift
20982 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
20983 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
20984 match(Set dst (RShiftVL src shift));
20985 effect(TEMP dst, TEMP tmp);
20986 format %{ "vshiftq $dst,$src,$shift" %}
20987 ins_encode %{
20988 uint vlen = Matcher::vector_length(this);
20989 if (vlen == 2) {
20990 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20991 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
20992 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
20993 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
20994 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
20995 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
20996 } else {
20997 assert(vlen == 4, "sanity");
20998 assert(UseAVX > 1, "required");
20999 int vlen_enc = Assembler::AVX_256bit;
21000 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21001 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21002 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21003 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21004 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21005 }
21006 %}
21007 ins_pipe( pipe_slow );
21008 %}
21009
21010 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21011 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21012 match(Set dst (RShiftVL src shift));
21013 format %{ "vshiftq $dst,$src,$shift" %}
21014 ins_encode %{
21015 int vlen_enc = vector_length_encoding(this);
21016 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21017 %}
21018 ins_pipe( pipe_slow );
21019 %}
21020
21021 // ------------------- Variable Shift -----------------------------
21022 // Byte variable shift
21023 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21024 predicate(Matcher::vector_length(n) <= 8 &&
21025 n->as_ShiftV()->is_var_shift() &&
21026 !VM_Version::supports_avx512bw());
21027 match(Set dst ( LShiftVB src shift));
21028 match(Set dst ( RShiftVB src shift));
21029 match(Set dst (URShiftVB src shift));
21030 effect(TEMP dst, TEMP vtmp);
21031 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21032 ins_encode %{
21033 assert(UseAVX >= 2, "required");
21034
21035 int opcode = this->ideal_Opcode();
21036 int vlen_enc = Assembler::AVX_128bit;
21037 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21038 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21039 %}
21040 ins_pipe( pipe_slow );
21041 %}
21042
21043 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21044 predicate(Matcher::vector_length(n) == 16 &&
21045 n->as_ShiftV()->is_var_shift() &&
21046 !VM_Version::supports_avx512bw());
21047 match(Set dst ( LShiftVB src shift));
21048 match(Set dst ( RShiftVB src shift));
21049 match(Set dst (URShiftVB src shift));
21050 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21051 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21052 ins_encode %{
21053 assert(UseAVX >= 2, "required");
21054
21055 int opcode = this->ideal_Opcode();
21056 int vlen_enc = Assembler::AVX_128bit;
21057 // Shift lower half and get word result in dst
21058 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21059
21060 // Shift upper half and get word result in vtmp1
21061 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21062 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21063 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21064
21065 // Merge and down convert the two word results to byte in dst
21066 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21067 %}
21068 ins_pipe( pipe_slow );
21069 %}
21070
21071 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21072 predicate(Matcher::vector_length(n) == 32 &&
21073 n->as_ShiftV()->is_var_shift() &&
21074 !VM_Version::supports_avx512bw());
21075 match(Set dst ( LShiftVB src shift));
21076 match(Set dst ( RShiftVB src shift));
21077 match(Set dst (URShiftVB src shift));
21078 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21079 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21080 ins_encode %{
21081 assert(UseAVX >= 2, "required");
21082
21083 int opcode = this->ideal_Opcode();
21084 int vlen_enc = Assembler::AVX_128bit;
21085 // Process lower 128 bits and get result in dst
21086 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21087 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21088 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21089 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21090 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21091
21092 // Process higher 128 bits and get result in vtmp3
21093 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21094 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21095 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21096 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21097 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21098 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21099 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21100
21101 // Merge the two results in dst
21102 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21103 %}
21104 ins_pipe( pipe_slow );
21105 %}
21106
21107 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21108 predicate(Matcher::vector_length(n) <= 32 &&
21109 n->as_ShiftV()->is_var_shift() &&
21110 VM_Version::supports_avx512bw());
21111 match(Set dst ( LShiftVB src shift));
21112 match(Set dst ( RShiftVB src shift));
21113 match(Set dst (URShiftVB src shift));
21114 effect(TEMP dst, TEMP vtmp);
21115 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21116 ins_encode %{
21117 assert(UseAVX > 2, "required");
21118
21119 int opcode = this->ideal_Opcode();
21120 int vlen_enc = vector_length_encoding(this);
21121 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21122 %}
21123 ins_pipe( pipe_slow );
21124 %}
21125
21126 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21127 predicate(Matcher::vector_length(n) == 64 &&
21128 n->as_ShiftV()->is_var_shift() &&
21129 VM_Version::supports_avx512bw());
21130 match(Set dst ( LShiftVB src shift));
21131 match(Set dst ( RShiftVB src shift));
21132 match(Set dst (URShiftVB src shift));
21133 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21134 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21135 ins_encode %{
21136 assert(UseAVX > 2, "required");
21137
21138 int opcode = this->ideal_Opcode();
21139 int vlen_enc = Assembler::AVX_256bit;
21140 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21141 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21142 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21143 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21144 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21145 %}
21146 ins_pipe( pipe_slow );
21147 %}
21148
21149 // Short variable shift
21150 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21151 predicate(Matcher::vector_length(n) <= 8 &&
21152 n->as_ShiftV()->is_var_shift() &&
21153 !VM_Version::supports_avx512bw());
21154 match(Set dst ( LShiftVS src shift));
21155 match(Set dst ( RShiftVS src shift));
21156 match(Set dst (URShiftVS src shift));
21157 effect(TEMP dst, TEMP vtmp);
21158 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21159 ins_encode %{
21160 assert(UseAVX >= 2, "required");
21161
21162 int opcode = this->ideal_Opcode();
21163 bool sign = (opcode != Op_URShiftVS);
21164 int vlen_enc = Assembler::AVX_256bit;
21165 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21166 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21167 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21168 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21169 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21170 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21171 %}
21172 ins_pipe( pipe_slow );
21173 %}
21174
21175 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21176 predicate(Matcher::vector_length(n) == 16 &&
21177 n->as_ShiftV()->is_var_shift() &&
21178 !VM_Version::supports_avx512bw());
21179 match(Set dst ( LShiftVS src shift));
21180 match(Set dst ( RShiftVS src shift));
21181 match(Set dst (URShiftVS src shift));
21182 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21183 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21184 ins_encode %{
21185 assert(UseAVX >= 2, "required");
21186
21187 int opcode = this->ideal_Opcode();
21188 bool sign = (opcode != Op_URShiftVS);
21189 int vlen_enc = Assembler::AVX_256bit;
21190 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21191 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21192 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21193 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21194 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21195
21196 // Shift upper half, with result in dst using vtmp1 as TEMP
21197 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21198 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21199 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21200 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21201 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21202 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21203
21204 // Merge lower and upper half result into dst
21205 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21206 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21207 %}
21208 ins_pipe( pipe_slow );
21209 %}
21210
21211 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21212 predicate(n->as_ShiftV()->is_var_shift() &&
21213 VM_Version::supports_avx512bw());
21214 match(Set dst ( LShiftVS src shift));
21215 match(Set dst ( RShiftVS src shift));
21216 match(Set dst (URShiftVS src shift));
21217 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21218 ins_encode %{
21219 assert(UseAVX > 2, "required");
21220
21221 int opcode = this->ideal_Opcode();
21222 int vlen_enc = vector_length_encoding(this);
21223 if (!VM_Version::supports_avx512vl()) {
21224 vlen_enc = Assembler::AVX_512bit;
21225 }
21226 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21227 %}
21228 ins_pipe( pipe_slow );
21229 %}
21230
21231 //Integer variable shift
21232 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21233 predicate(n->as_ShiftV()->is_var_shift());
21234 match(Set dst ( LShiftVI src shift));
21235 match(Set dst ( RShiftVI src shift));
21236 match(Set dst (URShiftVI src shift));
21237 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21238 ins_encode %{
21239 assert(UseAVX >= 2, "required");
21240
21241 int opcode = this->ideal_Opcode();
21242 int vlen_enc = vector_length_encoding(this);
21243 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21244 %}
21245 ins_pipe( pipe_slow );
21246 %}
21247
21248 //Long variable shift
21249 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21250 predicate(n->as_ShiftV()->is_var_shift());
21251 match(Set dst ( LShiftVL src shift));
21252 match(Set dst (URShiftVL src shift));
21253 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21254 ins_encode %{
21255 assert(UseAVX >= 2, "required");
21256
21257 int opcode = this->ideal_Opcode();
21258 int vlen_enc = vector_length_encoding(this);
21259 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21260 %}
21261 ins_pipe( pipe_slow );
21262 %}
21263
21264 //Long variable right shift arithmetic
21265 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21266 predicate(Matcher::vector_length(n) <= 4 &&
21267 n->as_ShiftV()->is_var_shift() &&
21268 UseAVX == 2);
21269 match(Set dst (RShiftVL src shift));
21270 effect(TEMP dst, TEMP vtmp);
21271 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21272 ins_encode %{
21273 int opcode = this->ideal_Opcode();
21274 int vlen_enc = vector_length_encoding(this);
21275 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21276 $vtmp$$XMMRegister);
21277 %}
21278 ins_pipe( pipe_slow );
21279 %}
21280
21281 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21282 predicate(n->as_ShiftV()->is_var_shift() &&
21283 UseAVX > 2);
21284 match(Set dst (RShiftVL src shift));
21285 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21286 ins_encode %{
21287 int opcode = this->ideal_Opcode();
21288 int vlen_enc = vector_length_encoding(this);
21289 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21290 %}
21291 ins_pipe( pipe_slow );
21292 %}
21293
21294 // --------------------------------- AND --------------------------------------
21295
21296 instruct vand(vec dst, vec src) %{
21297 predicate(UseAVX == 0);
21298 match(Set dst (AndV dst src));
21299 format %{ "pand $dst,$src\t! and vectors" %}
21300 ins_encode %{
21301 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21302 %}
21303 ins_pipe( pipe_slow );
21304 %}
21305
21306 instruct vand_reg(vec dst, vec src1, vec src2) %{
21307 predicate(UseAVX > 0);
21308 match(Set dst (AndV src1 src2));
21309 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21310 ins_encode %{
21311 int vlen_enc = vector_length_encoding(this);
21312 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21313 %}
21314 ins_pipe( pipe_slow );
21315 %}
21316
21317 instruct vand_mem(vec dst, vec src, memory mem) %{
21318 predicate((UseAVX > 0) &&
21319 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21320 match(Set dst (AndV src (LoadVector mem)));
21321 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21322 ins_encode %{
21323 int vlen_enc = vector_length_encoding(this);
21324 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21325 %}
21326 ins_pipe( pipe_slow );
21327 %}
21328
21329 // --------------------------------- OR ---------------------------------------
21330
21331 instruct vor(vec dst, vec src) %{
21332 predicate(UseAVX == 0);
21333 match(Set dst (OrV dst src));
21334 format %{ "por $dst,$src\t! or vectors" %}
21335 ins_encode %{
21336 __ por($dst$$XMMRegister, $src$$XMMRegister);
21337 %}
21338 ins_pipe( pipe_slow );
21339 %}
21340
21341 instruct vor_reg(vec dst, vec src1, vec src2) %{
21342 predicate(UseAVX > 0);
21343 match(Set dst (OrV src1 src2));
21344 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21345 ins_encode %{
21346 int vlen_enc = vector_length_encoding(this);
21347 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21348 %}
21349 ins_pipe( pipe_slow );
21350 %}
21351
21352 instruct vor_mem(vec dst, vec src, memory mem) %{
21353 predicate((UseAVX > 0) &&
21354 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21355 match(Set dst (OrV src (LoadVector mem)));
21356 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21357 ins_encode %{
21358 int vlen_enc = vector_length_encoding(this);
21359 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21360 %}
21361 ins_pipe( pipe_slow );
21362 %}
21363
21364 // --------------------------------- XOR --------------------------------------
21365
21366 instruct vxor(vec dst, vec src) %{
21367 predicate(UseAVX == 0);
21368 match(Set dst (XorV dst src));
21369 format %{ "pxor $dst,$src\t! xor vectors" %}
21370 ins_encode %{
21371 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21372 %}
21373 ins_pipe( pipe_slow );
21374 %}
21375
21376 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21377 predicate(UseAVX > 0);
21378 match(Set dst (XorV src1 src2));
21379 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21380 ins_encode %{
21381 int vlen_enc = vector_length_encoding(this);
21382 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21383 %}
21384 ins_pipe( pipe_slow );
21385 %}
21386
21387 instruct vxor_mem(vec dst, vec src, memory mem) %{
21388 predicate((UseAVX > 0) &&
21389 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21390 match(Set dst (XorV src (LoadVector mem)));
21391 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21392 ins_encode %{
21393 int vlen_enc = vector_length_encoding(this);
21394 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21395 %}
21396 ins_pipe( pipe_slow );
21397 %}
21398
21399 // --------------------------------- VectorCast --------------------------------------
21400
21401 instruct vcastBtoX(vec dst, vec src) %{
21402 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21403 match(Set dst (VectorCastB2X src));
21404 format %{ "vector_cast_b2x $dst,$src\t!" %}
21405 ins_encode %{
21406 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21407 int vlen_enc = vector_length_encoding(this);
21408 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21409 %}
21410 ins_pipe( pipe_slow );
21411 %}
21412
21413 instruct vcastBtoD(legVec dst, legVec src) %{
21414 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21415 match(Set dst (VectorCastB2X src));
21416 format %{ "vector_cast_b2x $dst,$src\t!" %}
21417 ins_encode %{
21418 int vlen_enc = vector_length_encoding(this);
21419 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21420 %}
21421 ins_pipe( pipe_slow );
21422 %}
21423
21424 instruct castStoX(vec dst, vec src) %{
21425 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21426 Matcher::vector_length(n->in(1)) <= 8 && // src
21427 Matcher::vector_element_basic_type(n) == T_BYTE);
21428 match(Set dst (VectorCastS2X src));
21429 format %{ "vector_cast_s2x $dst,$src" %}
21430 ins_encode %{
21431 assert(UseAVX > 0, "required");
21432
21433 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21434 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21435 %}
21436 ins_pipe( pipe_slow );
21437 %}
21438
21439 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21440 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21441 Matcher::vector_length(n->in(1)) == 16 && // src
21442 Matcher::vector_element_basic_type(n) == T_BYTE);
21443 effect(TEMP dst, TEMP vtmp);
21444 match(Set dst (VectorCastS2X src));
21445 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21446 ins_encode %{
21447 assert(UseAVX > 0, "required");
21448
21449 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21450 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21451 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21452 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21453 %}
21454 ins_pipe( pipe_slow );
21455 %}
21456
21457 instruct vcastStoX_evex(vec dst, vec src) %{
21458 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21459 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21460 match(Set dst (VectorCastS2X src));
21461 format %{ "vector_cast_s2x $dst,$src\t!" %}
21462 ins_encode %{
21463 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21464 int src_vlen_enc = vector_length_encoding(this, $src);
21465 int vlen_enc = vector_length_encoding(this);
21466 switch (to_elem_bt) {
21467 case T_BYTE:
21468 if (!VM_Version::supports_avx512vl()) {
21469 vlen_enc = Assembler::AVX_512bit;
21470 }
21471 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21472 break;
21473 case T_INT:
21474 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21475 break;
21476 case T_FLOAT:
21477 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21478 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21479 break;
21480 case T_LONG:
21481 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21482 break;
21483 case T_DOUBLE: {
21484 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21485 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21486 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21487 break;
21488 }
21489 default:
21490 ShouldNotReachHere();
21491 }
21492 %}
21493 ins_pipe( pipe_slow );
21494 %}
21495
21496 instruct castItoX(vec dst, vec src) %{
21497 predicate(UseAVX <= 2 &&
21498 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21499 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21500 match(Set dst (VectorCastI2X src));
21501 format %{ "vector_cast_i2x $dst,$src" %}
21502 ins_encode %{
21503 assert(UseAVX > 0, "required");
21504
21505 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21506 int vlen_enc = vector_length_encoding(this, $src);
21507
21508 if (to_elem_bt == T_BYTE) {
21509 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21510 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21511 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21512 } else {
21513 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21514 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21515 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21516 }
21517 %}
21518 ins_pipe( pipe_slow );
21519 %}
21520
21521 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21522 predicate(UseAVX <= 2 &&
21523 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21524 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21525 match(Set dst (VectorCastI2X src));
21526 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21527 effect(TEMP dst, TEMP vtmp);
21528 ins_encode %{
21529 assert(UseAVX > 0, "required");
21530
21531 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21532 int vlen_enc = vector_length_encoding(this, $src);
21533
21534 if (to_elem_bt == T_BYTE) {
21535 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21536 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21537 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21538 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21539 } else {
21540 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21541 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21542 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21543 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21544 }
21545 %}
21546 ins_pipe( pipe_slow );
21547 %}
21548
21549 instruct vcastItoX_evex(vec dst, vec src) %{
21550 predicate(UseAVX > 2 ||
21551 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21552 match(Set dst (VectorCastI2X src));
21553 format %{ "vector_cast_i2x $dst,$src\t!" %}
21554 ins_encode %{
21555 assert(UseAVX > 0, "required");
21556
21557 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21558 int src_vlen_enc = vector_length_encoding(this, $src);
21559 int dst_vlen_enc = vector_length_encoding(this);
21560 switch (dst_elem_bt) {
21561 case T_BYTE:
21562 if (!VM_Version::supports_avx512vl()) {
21563 src_vlen_enc = Assembler::AVX_512bit;
21564 }
21565 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21566 break;
21567 case T_SHORT:
21568 if (!VM_Version::supports_avx512vl()) {
21569 src_vlen_enc = Assembler::AVX_512bit;
21570 }
21571 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21572 break;
21573 case T_FLOAT:
21574 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21575 break;
21576 case T_LONG:
21577 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21578 break;
21579 case T_DOUBLE:
21580 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21581 break;
21582 default:
21583 ShouldNotReachHere();
21584 }
21585 %}
21586 ins_pipe( pipe_slow );
21587 %}
21588
21589 instruct vcastLtoBS(vec dst, vec src) %{
21590 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21591 UseAVX <= 2);
21592 match(Set dst (VectorCastL2X src));
21593 format %{ "vector_cast_l2x $dst,$src" %}
21594 ins_encode %{
21595 assert(UseAVX > 0, "required");
21596
21597 int vlen = Matcher::vector_length_in_bytes(this, $src);
21598 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21599 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21600 : ExternalAddress(vector_int_to_short_mask());
21601 if (vlen <= 16) {
21602 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21603 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21604 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21605 } else {
21606 assert(vlen <= 32, "required");
21607 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21608 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21609 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21610 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21611 }
21612 if (to_elem_bt == T_BYTE) {
21613 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21614 }
21615 %}
21616 ins_pipe( pipe_slow );
21617 %}
21618
21619 instruct vcastLtoX_evex(vec dst, vec src) %{
21620 predicate(UseAVX > 2 ||
21621 (Matcher::vector_element_basic_type(n) == T_INT ||
21622 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21623 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21624 match(Set dst (VectorCastL2X src));
21625 format %{ "vector_cast_l2x $dst,$src\t!" %}
21626 ins_encode %{
21627 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21628 int vlen = Matcher::vector_length_in_bytes(this, $src);
21629 int vlen_enc = vector_length_encoding(this, $src);
21630 switch (to_elem_bt) {
21631 case T_BYTE:
21632 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21633 vlen_enc = Assembler::AVX_512bit;
21634 }
21635 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21636 break;
21637 case T_SHORT:
21638 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21639 vlen_enc = Assembler::AVX_512bit;
21640 }
21641 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21642 break;
21643 case T_INT:
21644 if (vlen == 8) {
21645 if ($dst$$XMMRegister != $src$$XMMRegister) {
21646 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21647 }
21648 } else if (vlen == 16) {
21649 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21650 } else if (vlen == 32) {
21651 if (UseAVX > 2) {
21652 if (!VM_Version::supports_avx512vl()) {
21653 vlen_enc = Assembler::AVX_512bit;
21654 }
21655 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21656 } else {
21657 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21658 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21659 }
21660 } else { // vlen == 64
21661 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21662 }
21663 break;
21664 case T_FLOAT:
21665 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21666 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21667 break;
21668 case T_DOUBLE:
21669 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21670 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21671 break;
21672
21673 default: assert(false, "%s", type2name(to_elem_bt));
21674 }
21675 %}
21676 ins_pipe( pipe_slow );
21677 %}
21678
21679 instruct vcastFtoD_reg(vec dst, vec src) %{
21680 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21681 match(Set dst (VectorCastF2X src));
21682 format %{ "vector_cast_f2d $dst,$src\t!" %}
21683 ins_encode %{
21684 int vlen_enc = vector_length_encoding(this);
21685 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21686 %}
21687 ins_pipe( pipe_slow );
21688 %}
21689
21690
21691 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21692 predicate(!VM_Version::supports_avx10_2() &&
21693 !VM_Version::supports_avx512vl() &&
21694 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21695 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21696 is_integral_type(Matcher::vector_element_basic_type(n)));
21697 match(Set dst (VectorCastF2X src));
21698 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21699 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21700 ins_encode %{
21701 int vlen_enc = vector_length_encoding(this, $src);
21702 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21703 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21704 // 32 bit addresses for register indirect addressing mode since stub constants
21705 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21706 // However, targets are free to increase this limit, but having a large code cache size
21707 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21708 // cap we save a temporary register allocation which in limiting case can prevent
21709 // spilling in high register pressure blocks.
21710 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21711 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21712 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21713 %}
21714 ins_pipe( pipe_slow );
21715 %}
21716
21717 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21718 predicate(!VM_Version::supports_avx10_2() &&
21719 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21720 is_integral_type(Matcher::vector_element_basic_type(n)));
21721 match(Set dst (VectorCastF2X src));
21722 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21723 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21724 ins_encode %{
21725 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21726 if (to_elem_bt == T_LONG) {
21727 int vlen_enc = vector_length_encoding(this);
21728 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21729 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21730 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
21731 } else {
21732 int vlen_enc = vector_length_encoding(this, $src);
21733 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21734 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21735 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21736 }
21737 %}
21738 ins_pipe( pipe_slow );
21739 %}
21740
21741 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
21742 predicate(VM_Version::supports_avx10_2() &&
21743 is_integral_type(Matcher::vector_element_basic_type(n)));
21744 match(Set dst (VectorCastF2X src));
21745 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21746 ins_encode %{
21747 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21748 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
21749 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21750 %}
21751 ins_pipe( pipe_slow );
21752 %}
21753
21754 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
21755 predicate(VM_Version::supports_avx10_2() &&
21756 is_integral_type(Matcher::vector_element_basic_type(n)));
21757 match(Set dst (VectorCastF2X (LoadVector src)));
21758 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21759 ins_encode %{
21760 int vlen = Matcher::vector_length(this);
21761 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21762 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
21763 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21764 %}
21765 ins_pipe( pipe_slow );
21766 %}
21767
21768 instruct vcastDtoF_reg(vec dst, vec src) %{
21769 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
21770 match(Set dst (VectorCastD2X src));
21771 format %{ "vector_cast_d2x $dst,$src\t!" %}
21772 ins_encode %{
21773 int vlen_enc = vector_length_encoding(this, $src);
21774 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21775 %}
21776 ins_pipe( pipe_slow );
21777 %}
21778
21779 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
21780 predicate(!VM_Version::supports_avx10_2() &&
21781 !VM_Version::supports_avx512vl() &&
21782 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21783 is_integral_type(Matcher::vector_element_basic_type(n)));
21784 match(Set dst (VectorCastD2X src));
21785 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
21786 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
21787 ins_encode %{
21788 int vlen_enc = vector_length_encoding(this, $src);
21789 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21790 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21791 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
21792 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21793 %}
21794 ins_pipe( pipe_slow );
21795 %}
21796
21797 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21798 predicate(!VM_Version::supports_avx10_2() &&
21799 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21800 is_integral_type(Matcher::vector_element_basic_type(n)));
21801 match(Set dst (VectorCastD2X src));
21802 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21803 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21804 ins_encode %{
21805 int vlen_enc = vector_length_encoding(this, $src);
21806 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21807 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
21808 ExternalAddress(vector_float_signflip());
21809 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21810 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
21811 %}
21812 ins_pipe( pipe_slow );
21813 %}
21814
21815 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
21816 predicate(VM_Version::supports_avx10_2() &&
21817 is_integral_type(Matcher::vector_element_basic_type(n)));
21818 match(Set dst (VectorCastD2X src));
21819 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21820 ins_encode %{
21821 int vlen_enc = vector_length_encoding(this, $src);
21822 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21823 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21824 %}
21825 ins_pipe( pipe_slow );
21826 %}
21827
21828 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
21829 predicate(VM_Version::supports_avx10_2() &&
21830 is_integral_type(Matcher::vector_element_basic_type(n)));
21831 match(Set dst (VectorCastD2X (LoadVector src)));
21832 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21833 ins_encode %{
21834 int vlen = Matcher::vector_length(this);
21835 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
21836 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21837 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21838 %}
21839 ins_pipe( pipe_slow );
21840 %}
21841
21842 instruct vucast(vec dst, vec src) %{
21843 match(Set dst (VectorUCastB2X src));
21844 match(Set dst (VectorUCastS2X src));
21845 match(Set dst (VectorUCastI2X src));
21846 format %{ "vector_ucast $dst,$src\t!" %}
21847 ins_encode %{
21848 assert(UseAVX > 0, "required");
21849
21850 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
21851 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21852 int vlen_enc = vector_length_encoding(this);
21853 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
21854 %}
21855 ins_pipe( pipe_slow );
21856 %}
21857
21858 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21859 predicate(!VM_Version::supports_avx512vl() &&
21860 Matcher::vector_length_in_bytes(n) < 64 &&
21861 Matcher::vector_element_basic_type(n) == T_INT);
21862 match(Set dst (RoundVF src));
21863 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21864 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
21865 ins_encode %{
21866 int vlen_enc = vector_length_encoding(this);
21867 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21868 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
21869 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21870 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
21871 %}
21872 ins_pipe( pipe_slow );
21873 %}
21874
21875 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21876 predicate((VM_Version::supports_avx512vl() ||
21877 Matcher::vector_length_in_bytes(n) == 64) &&
21878 Matcher::vector_element_basic_type(n) == T_INT);
21879 match(Set dst (RoundVF src));
21880 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21881 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21882 ins_encode %{
21883 int vlen_enc = vector_length_encoding(this);
21884 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21885 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
21886 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21887 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21888 %}
21889 ins_pipe( pipe_slow );
21890 %}
21891
21892 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21893 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
21894 match(Set dst (RoundVD src));
21895 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21896 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21897 ins_encode %{
21898 int vlen_enc = vector_length_encoding(this);
21899 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21900 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
21901 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
21902 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21903 %}
21904 ins_pipe( pipe_slow );
21905 %}
21906
21907 // --------------------------------- VectorMaskCmp --------------------------------------
21908
21909 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21910 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21911 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
21912 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21913 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21914 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21915 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21916 ins_encode %{
21917 int vlen_enc = vector_length_encoding(this, $src1);
21918 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21919 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21920 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21921 } else {
21922 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21923 }
21924 %}
21925 ins_pipe( pipe_slow );
21926 %}
21927
21928 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
21929 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
21930 n->bottom_type()->isa_pvectmask() == nullptr &&
21931 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21932 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21933 effect(TEMP ktmp);
21934 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
21935 ins_encode %{
21936 int vlen_enc = Assembler::AVX_512bit;
21937 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21938 KRegister mask = k0; // The comparison itself is not being masked.
21939 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21940 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21941 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21942 } else {
21943 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21944 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21945 }
21946 %}
21947 ins_pipe( pipe_slow );
21948 %}
21949
21950 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
21951 predicate(n->bottom_type()->isa_pvectmask() &&
21952 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21953 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21954 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
21955 ins_encode %{
21956 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
21957 int vlen_enc = vector_length_encoding(this, $src1);
21958 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21959 KRegister mask = k0; // The comparison itself is not being masked.
21960 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21961 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21962 } else {
21963 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21964 }
21965 %}
21966 ins_pipe( pipe_slow );
21967 %}
21968
21969 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21970 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21971 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21972 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
21973 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21974 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21975 (n->in(2)->get_int() == BoolTest::eq ||
21976 n->in(2)->get_int() == BoolTest::lt ||
21977 n->in(2)->get_int() == BoolTest::gt)); // cond
21978 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21979 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21980 ins_encode %{
21981 int vlen_enc = vector_length_encoding(this, $src1);
21982 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
21983 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
21984 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
21985 %}
21986 ins_pipe( pipe_slow );
21987 %}
21988
21989 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
21990 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21991 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21992 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
21993 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21994 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21995 (n->in(2)->get_int() == BoolTest::ne ||
21996 n->in(2)->get_int() == BoolTest::le ||
21997 n->in(2)->get_int() == BoolTest::ge)); // cond
21998 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21999 effect(TEMP dst, TEMP xtmp);
22000 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22001 ins_encode %{
22002 int vlen_enc = vector_length_encoding(this, $src1);
22003 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22004 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22005 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22006 %}
22007 ins_pipe( pipe_slow );
22008 %}
22009
22010 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22011 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22012 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22013 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22014 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22015 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22016 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22017 effect(TEMP dst, TEMP xtmp);
22018 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22019 ins_encode %{
22020 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22021 int vlen_enc = vector_length_encoding(this, $src1);
22022 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22023 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22024
22025 if (vlen_enc == Assembler::AVX_128bit) {
22026 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22027 } else {
22028 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22029 }
22030 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22031 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22032 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22033 %}
22034 ins_pipe( pipe_slow );
22035 %}
22036
22037 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22038 predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22039 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22040 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22041 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22042 effect(TEMP ktmp);
22043 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22044 ins_encode %{
22045 assert(UseAVX > 2, "required");
22046
22047 int vlen_enc = vector_length_encoding(this, $src1);
22048 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22049 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22050 KRegister mask = k0; // The comparison itself is not being masked.
22051 bool merge = false;
22052 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22053
22054 switch (src1_elem_bt) {
22055 case T_INT: {
22056 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22057 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22058 break;
22059 }
22060 case T_LONG: {
22061 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22062 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22063 break;
22064 }
22065 default: assert(false, "%s", type2name(src1_elem_bt));
22066 }
22067 %}
22068 ins_pipe( pipe_slow );
22069 %}
22070
22071
22072 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22073 predicate(n->bottom_type()->isa_pvectmask() &&
22074 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22075 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22076 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22077 ins_encode %{
22078 assert(UseAVX > 2, "required");
22079 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22080
22081 int vlen_enc = vector_length_encoding(this, $src1);
22082 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22083 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22084 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22085
22086 // Comparison i
22087 switch (src1_elem_bt) {
22088 case T_BYTE: {
22089 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22090 break;
22091 }
22092 case T_SHORT: {
22093 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22094 break;
22095 }
22096 case T_INT: {
22097 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22098 break;
22099 }
22100 case T_LONG: {
22101 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22102 break;
22103 }
22104 default: assert(false, "%s", type2name(src1_elem_bt));
22105 }
22106 %}
22107 ins_pipe( pipe_slow );
22108 %}
22109
22110 // Extract
22111
22112 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22113 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22114 match(Set dst (ExtractI src idx));
22115 match(Set dst (ExtractS src idx));
22116 match(Set dst (ExtractB src idx));
22117 format %{ "extractI $dst,$src,$idx\t!" %}
22118 ins_encode %{
22119 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22120
22121 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22122 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22123 %}
22124 ins_pipe( pipe_slow );
22125 %}
22126
22127 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22128 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22129 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22130 match(Set dst (ExtractI src idx));
22131 match(Set dst (ExtractS src idx));
22132 match(Set dst (ExtractB src idx));
22133 effect(TEMP vtmp);
22134 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22135 ins_encode %{
22136 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22137
22138 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22139 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22140 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22141 %}
22142 ins_pipe( pipe_slow );
22143 %}
22144
22145 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22146 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22147 match(Set dst (ExtractL src idx));
22148 format %{ "extractL $dst,$src,$idx\t!" %}
22149 ins_encode %{
22150 assert(UseSSE >= 4, "required");
22151 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22152
22153 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22154 %}
22155 ins_pipe( pipe_slow );
22156 %}
22157
22158 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22159 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22160 Matcher::vector_length(n->in(1)) == 8); // src
22161 match(Set dst (ExtractL src idx));
22162 effect(TEMP vtmp);
22163 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22164 ins_encode %{
22165 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22166
22167 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22168 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22169 %}
22170 ins_pipe( pipe_slow );
22171 %}
22172
22173 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22174 predicate(Matcher::vector_length(n->in(1)) <= 4);
22175 match(Set dst (ExtractF src idx));
22176 effect(TEMP dst, TEMP vtmp);
22177 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22178 ins_encode %{
22179 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22180
22181 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22182 %}
22183 ins_pipe( pipe_slow );
22184 %}
22185
22186 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22187 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22188 Matcher::vector_length(n->in(1)/*src*/) == 16);
22189 match(Set dst (ExtractF src idx));
22190 effect(TEMP vtmp);
22191 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22192 ins_encode %{
22193 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22194
22195 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22196 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22197 %}
22198 ins_pipe( pipe_slow );
22199 %}
22200
22201 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22202 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22203 match(Set dst (ExtractD src idx));
22204 format %{ "extractD $dst,$src,$idx\t!" %}
22205 ins_encode %{
22206 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22207
22208 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22209 %}
22210 ins_pipe( pipe_slow );
22211 %}
22212
22213 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22214 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22215 Matcher::vector_length(n->in(1)) == 8); // src
22216 match(Set dst (ExtractD src idx));
22217 effect(TEMP vtmp);
22218 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22219 ins_encode %{
22220 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22221
22222 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22223 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22224 %}
22225 ins_pipe( pipe_slow );
22226 %}
22227
22228 // --------------------------------- Vector Blend --------------------------------------
22229
22230 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22231 predicate(UseAVX == 0);
22232 match(Set dst (VectorBlend (Binary dst src) mask));
22233 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22234 effect(TEMP tmp);
22235 ins_encode %{
22236 assert(UseSSE >= 4, "required");
22237
22238 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22239 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22240 }
22241 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22242 %}
22243 ins_pipe( pipe_slow );
22244 %}
22245
22246 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22247 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22248 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22249 Matcher::vector_length_in_bytes(n) <= 32 &&
22250 is_integral_type(Matcher::vector_element_basic_type(n)));
22251 match(Set dst (VectorBlend (Binary src1 src2) mask));
22252 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22253 ins_encode %{
22254 int vlen_enc = vector_length_encoding(this);
22255 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22256 %}
22257 ins_pipe( pipe_slow );
22258 %}
22259
22260 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22261 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22262 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22263 Matcher::vector_length_in_bytes(n) <= 32 &&
22264 !is_integral_type(Matcher::vector_element_basic_type(n)));
22265 match(Set dst (VectorBlend (Binary src1 src2) mask));
22266 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22267 ins_encode %{
22268 int vlen_enc = vector_length_encoding(this);
22269 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22270 %}
22271 ins_pipe( pipe_slow );
22272 %}
22273
22274 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22275 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22276 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22277 Matcher::vector_length_in_bytes(n) <= 32);
22278 match(Set dst (VectorBlend (Binary src1 src2) mask));
22279 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22280 effect(TEMP vtmp, TEMP dst);
22281 ins_encode %{
22282 int vlen_enc = vector_length_encoding(this);
22283 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22284 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22285 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22286 %}
22287 ins_pipe( pipe_slow );
22288 %}
22289
22290 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22291 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22292 n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22293 match(Set dst (VectorBlend (Binary src1 src2) mask));
22294 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22295 effect(TEMP ktmp);
22296 ins_encode %{
22297 int vlen_enc = Assembler::AVX_512bit;
22298 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22299 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22300 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22301 %}
22302 ins_pipe( pipe_slow );
22303 %}
22304
22305
22306 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22307 predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22308 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22309 VM_Version::supports_avx512bw()));
22310 match(Set dst (VectorBlend (Binary src1 src2) mask));
22311 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22312 ins_encode %{
22313 int vlen_enc = vector_length_encoding(this);
22314 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22315 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22316 %}
22317 ins_pipe( pipe_slow );
22318 %}
22319
22320 // --------------------------------- ABS --------------------------------------
22321 // a = |a|
22322 instruct vabsB_reg(vec dst, vec src) %{
22323 match(Set dst (AbsVB src));
22324 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22325 ins_encode %{
22326 uint vlen = Matcher::vector_length(this);
22327 if (vlen <= 16) {
22328 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22329 } else {
22330 int vlen_enc = vector_length_encoding(this);
22331 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22332 }
22333 %}
22334 ins_pipe( pipe_slow );
22335 %}
22336
22337 instruct vabsS_reg(vec dst, vec src) %{
22338 match(Set dst (AbsVS src));
22339 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22340 ins_encode %{
22341 uint vlen = Matcher::vector_length(this);
22342 if (vlen <= 8) {
22343 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22344 } else {
22345 int vlen_enc = vector_length_encoding(this);
22346 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22347 }
22348 %}
22349 ins_pipe( pipe_slow );
22350 %}
22351
22352 instruct vabsI_reg(vec dst, vec src) %{
22353 match(Set dst (AbsVI src));
22354 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22355 ins_encode %{
22356 uint vlen = Matcher::vector_length(this);
22357 if (vlen <= 4) {
22358 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22359 } else {
22360 int vlen_enc = vector_length_encoding(this);
22361 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22362 }
22363 %}
22364 ins_pipe( pipe_slow );
22365 %}
22366
22367 instruct vabsL_reg(vec dst, vec src) %{
22368 match(Set dst (AbsVL src));
22369 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22370 ins_encode %{
22371 assert(UseAVX > 2, "required");
22372 int vlen_enc = vector_length_encoding(this);
22373 if (!VM_Version::supports_avx512vl()) {
22374 vlen_enc = Assembler::AVX_512bit;
22375 }
22376 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22377 %}
22378 ins_pipe( pipe_slow );
22379 %}
22380
22381 // --------------------------------- ABSNEG --------------------------------------
22382
22383 instruct vabsnegF(vec dst, vec src) %{
22384 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22385 match(Set dst (AbsVF src));
22386 match(Set dst (NegVF src));
22387 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22388 ins_cost(150);
22389 ins_encode %{
22390 int opcode = this->ideal_Opcode();
22391 int vlen = Matcher::vector_length(this);
22392 if (vlen == 2) {
22393 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22394 } else {
22395 assert(vlen == 8 || vlen == 16, "required");
22396 int vlen_enc = vector_length_encoding(this);
22397 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22398 }
22399 %}
22400 ins_pipe( pipe_slow );
22401 %}
22402
22403 instruct vabsneg4F(vec dst) %{
22404 predicate(Matcher::vector_length(n) == 4);
22405 match(Set dst (AbsVF dst));
22406 match(Set dst (NegVF dst));
22407 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22408 ins_cost(150);
22409 ins_encode %{
22410 int opcode = this->ideal_Opcode();
22411 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22412 %}
22413 ins_pipe( pipe_slow );
22414 %}
22415
22416 instruct vabsnegD(vec dst, vec src) %{
22417 match(Set dst (AbsVD src));
22418 match(Set dst (NegVD src));
22419 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22420 ins_encode %{
22421 int opcode = this->ideal_Opcode();
22422 uint vlen = Matcher::vector_length(this);
22423 if (vlen == 2) {
22424 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22425 } else {
22426 int vlen_enc = vector_length_encoding(this);
22427 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22428 }
22429 %}
22430 ins_pipe( pipe_slow );
22431 %}
22432
22433 //------------------------------------- VectorTest --------------------------------------------
22434
22435 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22436 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22437 match(Set cr (VectorTest src1 src2));
22438 effect(TEMP vtmp);
22439 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22440 ins_encode %{
22441 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22442 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22443 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22444 %}
22445 ins_pipe( pipe_slow );
22446 %}
22447
22448 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22449 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22450 match(Set cr (VectorTest src1 src2));
22451 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22452 ins_encode %{
22453 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22454 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22455 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22456 %}
22457 ins_pipe( pipe_slow );
22458 %}
22459
22460 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22461 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22462 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22463 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22464 match(Set cr (VectorTest src1 src2));
22465 effect(TEMP tmp);
22466 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22467 ins_encode %{
22468 uint masklen = Matcher::vector_length(this, $src1);
22469 __ kmovwl($tmp$$Register, $src1$$KRegister);
22470 __ andl($tmp$$Register, (1 << masklen) - 1);
22471 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22472 %}
22473 ins_pipe( pipe_slow );
22474 %}
22475
22476 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22477 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22478 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22479 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22480 match(Set cr (VectorTest src1 src2));
22481 effect(TEMP tmp);
22482 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22483 ins_encode %{
22484 uint masklen = Matcher::vector_length(this, $src1);
22485 __ kmovwl($tmp$$Register, $src1$$KRegister);
22486 __ andl($tmp$$Register, (1 << masklen) - 1);
22487 %}
22488 ins_pipe( pipe_slow );
22489 %}
22490
22491 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22492 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22493 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22494 match(Set cr (VectorTest src1 src2));
22495 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22496 ins_encode %{
22497 uint masklen = Matcher::vector_length(this, $src1);
22498 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22499 %}
22500 ins_pipe( pipe_slow );
22501 %}
22502
22503 //------------------------------------- LoadMask --------------------------------------------
22504
22505 instruct loadMask(legVec dst, legVec src) %{
22506 predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22507 match(Set dst (VectorLoadMask src));
22508 effect(TEMP dst);
22509 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22510 ins_encode %{
22511 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22512 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22513 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22514 %}
22515 ins_pipe( pipe_slow );
22516 %}
22517
22518 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22519 predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22520 match(Set dst (VectorLoadMask src));
22521 effect(TEMP xtmp);
22522 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22523 ins_encode %{
22524 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22525 true, Assembler::AVX_512bit);
22526 %}
22527 ins_pipe( pipe_slow );
22528 %}
22529
22530 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22531 predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22532 match(Set dst (VectorLoadMask src));
22533 effect(TEMP xtmp);
22534 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22535 ins_encode %{
22536 int vlen_enc = vector_length_encoding(in(1));
22537 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22538 false, vlen_enc);
22539 %}
22540 ins_pipe( pipe_slow );
22541 %}
22542
22543 //------------------------------------- StoreMask --------------------------------------------
22544
22545 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22546 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22547 match(Set dst (VectorStoreMask src size));
22548 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22549 ins_encode %{
22550 int vlen = Matcher::vector_length(this);
22551 if (vlen <= 16 && UseAVX <= 2) {
22552 assert(UseSSE >= 3, "required");
22553 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22554 } else {
22555 assert(UseAVX > 0, "required");
22556 int src_vlen_enc = vector_length_encoding(this, $src);
22557 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22558 }
22559 %}
22560 ins_pipe( pipe_slow );
22561 %}
22562
22563 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22564 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22565 match(Set dst (VectorStoreMask src size));
22566 effect(TEMP_DEF dst, TEMP xtmp);
22567 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22568 ins_encode %{
22569 int vlen_enc = Assembler::AVX_128bit;
22570 int vlen = Matcher::vector_length(this);
22571 if (vlen <= 8) {
22572 assert(UseSSE >= 3, "required");
22573 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22574 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22575 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22576 } else {
22577 assert(UseAVX > 0, "required");
22578 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22579 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22580 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22581 }
22582 %}
22583 ins_pipe( pipe_slow );
22584 %}
22585
22586 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22587 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22588 match(Set dst (VectorStoreMask src size));
22589 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22590 effect(TEMP_DEF dst, TEMP xtmp);
22591 ins_encode %{
22592 int vlen_enc = Assembler::AVX_128bit;
22593 int vlen = Matcher::vector_length(this);
22594 if (vlen <= 4) {
22595 assert(UseSSE >= 3, "required");
22596 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22597 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22598 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22599 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22600 } else {
22601 assert(UseAVX > 0, "required");
22602 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22603 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22604 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22605 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22606 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22607 }
22608 %}
22609 ins_pipe( pipe_slow );
22610 %}
22611
22612 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22613 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22614 match(Set dst (VectorStoreMask src size));
22615 effect(TEMP_DEF dst, TEMP xtmp);
22616 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22617 ins_encode %{
22618 assert(UseSSE >= 3, "required");
22619 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22620 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22621 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22622 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22623 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22624 %}
22625 ins_pipe( pipe_slow );
22626 %}
22627
22628 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22629 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22630 match(Set dst (VectorStoreMask src size));
22631 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22632 effect(TEMP_DEF dst, TEMP vtmp);
22633 ins_encode %{
22634 int vlen_enc = Assembler::AVX_128bit;
22635 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22636 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22637 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22638 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22639 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22640 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22641 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22642 %}
22643 ins_pipe( pipe_slow );
22644 %}
22645
22646 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22647 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22648 match(Set dst (VectorStoreMask src size));
22649 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22650 ins_encode %{
22651 int src_vlen_enc = vector_length_encoding(this, $src);
22652 int dst_vlen_enc = vector_length_encoding(this);
22653 if (!VM_Version::supports_avx512vl()) {
22654 src_vlen_enc = Assembler::AVX_512bit;
22655 }
22656 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22657 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22658 %}
22659 ins_pipe( pipe_slow );
22660 %}
22661
22662 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22663 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22664 match(Set dst (VectorStoreMask src size));
22665 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22666 ins_encode %{
22667 int src_vlen_enc = vector_length_encoding(this, $src);
22668 int dst_vlen_enc = vector_length_encoding(this);
22669 if (!VM_Version::supports_avx512vl()) {
22670 src_vlen_enc = Assembler::AVX_512bit;
22671 }
22672 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22673 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22674 %}
22675 ins_pipe( pipe_slow );
22676 %}
22677
22678 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22679 predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22680 match(Set dst (VectorStoreMask mask size));
22681 effect(TEMP_DEF dst);
22682 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22683 ins_encode %{
22684 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22685 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22686 false, Assembler::AVX_512bit, noreg);
22687 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22688 %}
22689 ins_pipe( pipe_slow );
22690 %}
22691
22692 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22693 predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22694 match(Set dst (VectorStoreMask mask size));
22695 effect(TEMP_DEF dst);
22696 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22697 ins_encode %{
22698 int dst_vlen_enc = vector_length_encoding(this);
22699 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22700 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22701 %}
22702 ins_pipe( pipe_slow );
22703 %}
22704
22705 instruct vmaskcast_evex(kReg dst) %{
22706 match(Set dst (VectorMaskCast dst));
22707 ins_cost(0);
22708 format %{ "vector_mask_cast $dst" %}
22709 ins_encode %{
22710 // empty
22711 %}
22712 ins_pipe(empty);
22713 %}
22714
22715 instruct vmaskcast(vec dst) %{
22716 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22717 match(Set dst (VectorMaskCast dst));
22718 ins_cost(0);
22719 format %{ "vector_mask_cast $dst" %}
22720 ins_encode %{
22721 // empty
22722 %}
22723 ins_pipe(empty);
22724 %}
22725
22726 instruct vmaskcast_avx(vec dst, vec src) %{
22727 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
22728 match(Set dst (VectorMaskCast src));
22729 format %{ "vector_mask_cast $dst, $src" %}
22730 ins_encode %{
22731 int vlen = Matcher::vector_length(this);
22732 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
22733 BasicType dst_bt = Matcher::vector_element_basic_type(this);
22734 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
22735 %}
22736 ins_pipe(pipe_slow);
22737 %}
22738
22739 //-------------------------------- Load Iota Indices ----------------------------------
22740
22741 instruct loadIotaIndices(vec dst, immI_0 src) %{
22742 match(Set dst (VectorLoadConst src));
22743 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
22744 ins_encode %{
22745 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22746 BasicType bt = Matcher::vector_element_basic_type(this);
22747 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
22748 %}
22749 ins_pipe( pipe_slow );
22750 %}
22751
22752 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
22753 match(Set dst (PopulateIndex src1 src2));
22754 effect(TEMP dst, TEMP vtmp);
22755 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22756 ins_encode %{
22757 assert($src2$$constant == 1, "required");
22758 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22759 int vlen_enc = vector_length_encoding(this);
22760 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22761 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22762 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22763 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22764 %}
22765 ins_pipe( pipe_slow );
22766 %}
22767
22768 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
22769 match(Set dst (PopulateIndex src1 src2));
22770 effect(TEMP dst, TEMP vtmp);
22771 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22772 ins_encode %{
22773 assert($src2$$constant == 1, "required");
22774 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22775 int vlen_enc = vector_length_encoding(this);
22776 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22777 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22778 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22779 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22780 %}
22781 ins_pipe( pipe_slow );
22782 %}
22783
22784 //-------------------------------- Rearrange ----------------------------------
22785
22786 // LoadShuffle/Rearrange for Byte
22787 instruct rearrangeB(vec dst, vec shuffle) %{
22788 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22789 Matcher::vector_length(n) < 32);
22790 match(Set dst (VectorRearrange dst shuffle));
22791 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22792 ins_encode %{
22793 assert(UseSSE >= 4, "required");
22794 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22795 %}
22796 ins_pipe( pipe_slow );
22797 %}
22798
22799 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22800 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22801 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
22802 match(Set dst (VectorRearrange src shuffle));
22803 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22804 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22805 ins_encode %{
22806 assert(UseAVX >= 2, "required");
22807 // Swap src into vtmp1
22808 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22809 // Shuffle swapped src to get entries from other 128 bit lane
22810 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22811 // Shuffle original src to get entries from self 128 bit lane
22812 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22813 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22814 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22815 // Perform the blend
22816 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22817 %}
22818 ins_pipe( pipe_slow );
22819 %}
22820
22821
22822 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
22823 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22824 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
22825 match(Set dst (VectorRearrange src shuffle));
22826 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
22827 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
22828 ins_encode %{
22829 int vlen_enc = vector_length_encoding(this);
22830 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
22831 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
22832 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
22833 %}
22834 ins_pipe( pipe_slow );
22835 %}
22836
22837 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
22838 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22839 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
22840 match(Set dst (VectorRearrange src shuffle));
22841 format %{ "vector_rearrange $dst, $shuffle, $src" %}
22842 ins_encode %{
22843 int vlen_enc = vector_length_encoding(this);
22844 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22845 %}
22846 ins_pipe( pipe_slow );
22847 %}
22848
22849 // LoadShuffle/Rearrange for Short
22850
22851 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
22852 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22853 !VM_Version::supports_avx512bw());
22854 match(Set dst (VectorLoadShuffle src));
22855 effect(TEMP dst, TEMP vtmp);
22856 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22857 ins_encode %{
22858 // Create a byte shuffle mask from short shuffle mask
22859 // only byte shuffle instruction available on these platforms
22860 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22861 if (UseAVX == 0) {
22862 assert(vlen_in_bytes <= 16, "required");
22863 // Multiply each shuffle by two to get byte index
22864 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22865 __ psllw($vtmp$$XMMRegister, 1);
22866
22867 // Duplicate to create 2 copies of byte index
22868 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22869 __ psllw($dst$$XMMRegister, 8);
22870 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
22871
22872 // Add one to get alternate byte index
22873 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
22874 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22875 } else {
22876 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
22877 int vlen_enc = vector_length_encoding(this);
22878 // Multiply each shuffle by two to get byte index
22879 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
22880
22881 // Duplicate to create 2 copies of byte index
22882 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
22883 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22884
22885 // Add one to get alternate byte index
22886 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
22887 }
22888 %}
22889 ins_pipe( pipe_slow );
22890 %}
22891
22892 instruct rearrangeS(vec dst, vec shuffle) %{
22893 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22894 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
22895 match(Set dst (VectorRearrange dst shuffle));
22896 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22897 ins_encode %{
22898 assert(UseSSE >= 4, "required");
22899 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22900 %}
22901 ins_pipe( pipe_slow );
22902 %}
22903
22904 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22905 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22906 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
22907 match(Set dst (VectorRearrange src shuffle));
22908 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22909 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22910 ins_encode %{
22911 assert(UseAVX >= 2, "required");
22912 // Swap src into vtmp1
22913 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22914 // Shuffle swapped src to get entries from other 128 bit lane
22915 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22916 // Shuffle original src to get entries from self 128 bit lane
22917 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22918 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22919 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22920 // Perform the blend
22921 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22922 %}
22923 ins_pipe( pipe_slow );
22924 %}
22925
22926 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
22927 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22928 VM_Version::supports_avx512bw());
22929 match(Set dst (VectorRearrange src shuffle));
22930 format %{ "vector_rearrange $dst, $shuffle, $src" %}
22931 ins_encode %{
22932 int vlen_enc = vector_length_encoding(this);
22933 if (!VM_Version::supports_avx512vl()) {
22934 vlen_enc = Assembler::AVX_512bit;
22935 }
22936 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22937 %}
22938 ins_pipe( pipe_slow );
22939 %}
22940
22941 // LoadShuffle/Rearrange for Integer and Float
22942
22943 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
22944 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22945 Matcher::vector_length(n) == 4 && UseAVX == 0);
22946 match(Set dst (VectorLoadShuffle src));
22947 effect(TEMP dst, TEMP vtmp);
22948 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22949 ins_encode %{
22950 assert(UseSSE >= 4, "required");
22951
22952 // Create a byte shuffle mask from int shuffle mask
22953 // only byte shuffle instruction available on these platforms
22954
22955 // Duplicate and multiply each shuffle by 4
22956 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22957 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22958 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22959 __ psllw($vtmp$$XMMRegister, 2);
22960
22961 // Duplicate again to create 4 copies of byte index
22962 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22963 __ psllw($dst$$XMMRegister, 8);
22964 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
22965
22966 // Add 3,2,1,0 to get alternate byte index
22967 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
22968 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22969 %}
22970 ins_pipe( pipe_slow );
22971 %}
22972
22973 instruct rearrangeI(vec dst, vec shuffle) %{
22974 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22975 UseAVX == 0);
22976 match(Set dst (VectorRearrange dst shuffle));
22977 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22978 ins_encode %{
22979 assert(UseSSE >= 4, "required");
22980 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22981 %}
22982 ins_pipe( pipe_slow );
22983 %}
22984
22985 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
22986 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22987 UseAVX > 0);
22988 match(Set dst (VectorRearrange src shuffle));
22989 format %{ "vector_rearrange $dst, $shuffle, $src" %}
22990 ins_encode %{
22991 int vlen_enc = vector_length_encoding(this);
22992 BasicType bt = Matcher::vector_element_basic_type(this);
22993 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22994 %}
22995 ins_pipe( pipe_slow );
22996 %}
22997
22998 // LoadShuffle/Rearrange for Long and Double
22999
23000 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23001 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23002 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23003 match(Set dst (VectorLoadShuffle src));
23004 effect(TEMP dst, TEMP vtmp);
23005 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23006 ins_encode %{
23007 assert(UseAVX >= 2, "required");
23008
23009 int vlen_enc = vector_length_encoding(this);
23010 // Create a double word shuffle mask from long shuffle mask
23011 // only double word shuffle instruction available on these platforms
23012
23013 // Multiply each shuffle by two to get double word index
23014 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23015
23016 // Duplicate each double word shuffle
23017 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23018 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23019
23020 // Add one to get alternate double word index
23021 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23022 %}
23023 ins_pipe( pipe_slow );
23024 %}
23025
23026 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23027 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23028 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23029 match(Set dst (VectorRearrange src shuffle));
23030 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23031 ins_encode %{
23032 assert(UseAVX >= 2, "required");
23033
23034 int vlen_enc = vector_length_encoding(this);
23035 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23036 %}
23037 ins_pipe( pipe_slow );
23038 %}
23039
23040 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23041 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23042 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23043 match(Set dst (VectorRearrange src shuffle));
23044 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23045 ins_encode %{
23046 assert(UseAVX > 2, "required");
23047
23048 int vlen_enc = vector_length_encoding(this);
23049 if (vlen_enc == Assembler::AVX_128bit) {
23050 vlen_enc = Assembler::AVX_256bit;
23051 }
23052 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23053 %}
23054 ins_pipe( pipe_slow );
23055 %}
23056
23057 // --------------------------------- FMA --------------------------------------
23058 // a * b + c
23059
23060 instruct vfmaF_reg(vec a, vec b, vec c) %{
23061 match(Set c (FmaVF c (Binary a b)));
23062 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23063 ins_cost(150);
23064 ins_encode %{
23065 assert(UseFMA, "not enabled");
23066 int vlen_enc = vector_length_encoding(this);
23067 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23068 %}
23069 ins_pipe( pipe_slow );
23070 %}
23071
23072 instruct vfmaF_mem(vec a, memory b, vec c) %{
23073 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23074 match(Set c (FmaVF c (Binary a (LoadVector b))));
23075 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23076 ins_cost(150);
23077 ins_encode %{
23078 assert(UseFMA, "not enabled");
23079 int vlen_enc = vector_length_encoding(this);
23080 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23081 %}
23082 ins_pipe( pipe_slow );
23083 %}
23084
23085 instruct vfmaD_reg(vec a, vec b, vec c) %{
23086 match(Set c (FmaVD c (Binary a b)));
23087 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23088 ins_cost(150);
23089 ins_encode %{
23090 assert(UseFMA, "not enabled");
23091 int vlen_enc = vector_length_encoding(this);
23092 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23093 %}
23094 ins_pipe( pipe_slow );
23095 %}
23096
23097 instruct vfmaD_mem(vec a, memory b, vec c) %{
23098 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23099 match(Set c (FmaVD c (Binary a (LoadVector b))));
23100 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23101 ins_cost(150);
23102 ins_encode %{
23103 assert(UseFMA, "not enabled");
23104 int vlen_enc = vector_length_encoding(this);
23105 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23106 %}
23107 ins_pipe( pipe_slow );
23108 %}
23109
23110 // --------------------------------- Vector Multiply Add --------------------------------------
23111
23112 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23113 predicate(UseAVX == 0);
23114 match(Set dst (MulAddVS2VI dst src1));
23115 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23116 ins_encode %{
23117 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23118 %}
23119 ins_pipe( pipe_slow );
23120 %}
23121
23122 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23123 predicate(UseAVX > 0);
23124 match(Set dst (MulAddVS2VI src1 src2));
23125 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23126 ins_encode %{
23127 int vlen_enc = vector_length_encoding(this);
23128 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23129 %}
23130 ins_pipe( pipe_slow );
23131 %}
23132
23133 // --------------------------------- Vector Multiply Add Add ----------------------------------
23134
23135 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23136 predicate(VM_Version::supports_avx512_vnni());
23137 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23138 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23139 ins_encode %{
23140 assert(UseAVX > 2, "required");
23141 int vlen_enc = vector_length_encoding(this);
23142 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23143 %}
23144 ins_pipe( pipe_slow );
23145 ins_cost(10);
23146 %}
23147
23148 // --------------------------------- PopCount --------------------------------------
23149
23150 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23151 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23152 match(Set dst (PopCountVI src));
23153 match(Set dst (PopCountVL src));
23154 format %{ "vector_popcount_integral $dst, $src" %}
23155 ins_encode %{
23156 int opcode = this->ideal_Opcode();
23157 int vlen_enc = vector_length_encoding(this, $src);
23158 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23159 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23160 %}
23161 ins_pipe( pipe_slow );
23162 %}
23163
23164 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23165 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23166 match(Set dst (PopCountVI src mask));
23167 match(Set dst (PopCountVL src mask));
23168 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23169 ins_encode %{
23170 int vlen_enc = vector_length_encoding(this, $src);
23171 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23172 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23173 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23174 %}
23175 ins_pipe( pipe_slow );
23176 %}
23177
23178 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23179 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23180 match(Set dst (PopCountVI src));
23181 match(Set dst (PopCountVL src));
23182 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23183 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23184 ins_encode %{
23185 int opcode = this->ideal_Opcode();
23186 int vlen_enc = vector_length_encoding(this, $src);
23187 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23188 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23189 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23190 %}
23191 ins_pipe( pipe_slow );
23192 %}
23193
23194 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23195
23196 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23197 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23198 Matcher::vector_length_in_bytes(n->in(1))));
23199 match(Set dst (CountTrailingZerosV src));
23200 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23201 ins_cost(400);
23202 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23203 ins_encode %{
23204 int vlen_enc = vector_length_encoding(this, $src);
23205 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23206 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23207 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23208 %}
23209 ins_pipe( pipe_slow );
23210 %}
23211
23212 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23213 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23214 VM_Version::supports_avx512cd() &&
23215 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23216 match(Set dst (CountTrailingZerosV src));
23217 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23218 ins_cost(400);
23219 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23220 ins_encode %{
23221 int vlen_enc = vector_length_encoding(this, $src);
23222 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23223 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23224 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23225 %}
23226 ins_pipe( pipe_slow );
23227 %}
23228
23229 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23230 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23231 match(Set dst (CountTrailingZerosV src));
23232 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23233 ins_cost(400);
23234 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23235 ins_encode %{
23236 int vlen_enc = vector_length_encoding(this, $src);
23237 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23238 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23239 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23240 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23241 %}
23242 ins_pipe( pipe_slow );
23243 %}
23244
23245 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23246 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23247 match(Set dst (CountTrailingZerosV src));
23248 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23249 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23250 ins_encode %{
23251 int vlen_enc = vector_length_encoding(this, $src);
23252 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23253 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23254 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23255 %}
23256 ins_pipe( pipe_slow );
23257 %}
23258
23259
23260 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23261
23262 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23263 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23264 effect(TEMP dst);
23265 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23266 ins_encode %{
23267 int vector_len = vector_length_encoding(this);
23268 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23269 %}
23270 ins_pipe( pipe_slow );
23271 %}
23272
23273 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23274 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23275 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23276 effect(TEMP dst);
23277 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23278 ins_encode %{
23279 int vector_len = vector_length_encoding(this);
23280 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23281 %}
23282 ins_pipe( pipe_slow );
23283 %}
23284
23285 // --------------------------------- Rotation Operations ----------------------------------
23286 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23287 match(Set dst (RotateLeftV src shift));
23288 match(Set dst (RotateRightV src shift));
23289 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23290 ins_encode %{
23291 int opcode = this->ideal_Opcode();
23292 int vector_len = vector_length_encoding(this);
23293 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23294 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23295 %}
23296 ins_pipe( pipe_slow );
23297 %}
23298
23299 instruct vprorate(vec dst, vec src, vec shift) %{
23300 match(Set dst (RotateLeftV src shift));
23301 match(Set dst (RotateRightV src shift));
23302 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23303 ins_encode %{
23304 int opcode = this->ideal_Opcode();
23305 int vector_len = vector_length_encoding(this);
23306 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23307 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23308 %}
23309 ins_pipe( pipe_slow );
23310 %}
23311
23312 // ---------------------------------- Masked Operations ------------------------------------
23313 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23314 predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23315 match(Set dst (LoadVectorMasked mem mask));
23316 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23317 ins_encode %{
23318 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23319 int vlen_enc = vector_length_encoding(this);
23320 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23321 %}
23322 ins_pipe( pipe_slow );
23323 %}
23324
23325
23326 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23327 predicate(n->in(3)->bottom_type()->isa_pvectmask());
23328 match(Set dst (LoadVectorMasked mem mask));
23329 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23330 ins_encode %{
23331 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23332 int vector_len = vector_length_encoding(this);
23333 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23334 %}
23335 ins_pipe( pipe_slow );
23336 %}
23337
23338 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23339 predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23340 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23341 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23342 ins_encode %{
23343 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23344 int vlen_enc = vector_length_encoding(src_node);
23345 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23346 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23347 %}
23348 ins_pipe( pipe_slow );
23349 %}
23350
23351 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23352 predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23353 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23354 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23355 ins_encode %{
23356 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23357 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23358 int vlen_enc = vector_length_encoding(src_node);
23359 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23360 %}
23361 ins_pipe( pipe_slow );
23362 %}
23363
23364 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23365 match(Set addr (VerifyVectorAlignment addr mask));
23366 effect(KILL cr);
23367 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23368 ins_encode %{
23369 Label Lskip;
23370 // check if masked bits of addr are zero
23371 __ testq($addr$$Register, $mask$$constant);
23372 __ jccb(Assembler::equal, Lskip);
23373 __ stop("verify_vector_alignment found a misaligned vector memory access");
23374 __ bind(Lskip);
23375 %}
23376 ins_pipe(pipe_slow);
23377 %}
23378
23379 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23380 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23381 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23382 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23383 ins_encode %{
23384 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23385 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23386
23387 Label DONE;
23388 int vlen_enc = vector_length_encoding(this, $src1);
23389 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23390
23391 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23392 __ mov64($dst$$Register, -1L);
23393 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23394 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23395 __ jccb(Assembler::carrySet, DONE);
23396 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23397 __ notq($dst$$Register);
23398 __ tzcntq($dst$$Register, $dst$$Register);
23399 __ bind(DONE);
23400 %}
23401 ins_pipe( pipe_slow );
23402 %}
23403
23404
23405 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23406 match(Set dst (VectorMaskGen len));
23407 effect(TEMP temp, KILL cr);
23408 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23409 ins_encode %{
23410 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23411 %}
23412 ins_pipe( pipe_slow );
23413 %}
23414
23415 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23416 match(Set dst (VectorMaskGen len));
23417 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23418 effect(TEMP temp);
23419 ins_encode %{
23420 if ($len$$constant > 0) {
23421 __ mov64($temp$$Register, right_n_bits($len$$constant));
23422 __ kmovql($dst$$KRegister, $temp$$Register);
23423 } else {
23424 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23425 }
23426 %}
23427 ins_pipe( pipe_slow );
23428 %}
23429
23430 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23431 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23432 match(Set dst (VectorMaskToLong mask));
23433 effect(TEMP dst, KILL cr);
23434 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23435 ins_encode %{
23436 int opcode = this->ideal_Opcode();
23437 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23438 int mask_len = Matcher::vector_length(this, $mask);
23439 int mask_size = mask_len * type2aelembytes(mbt);
23440 int vlen_enc = vector_length_encoding(this, $mask);
23441 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23442 $dst$$Register, mask_len, mask_size, vlen_enc);
23443 %}
23444 ins_pipe( pipe_slow );
23445 %}
23446
23447 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23448 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23449 match(Set dst (VectorMaskToLong mask));
23450 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23451 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23452 ins_encode %{
23453 int opcode = this->ideal_Opcode();
23454 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23455 int mask_len = Matcher::vector_length(this, $mask);
23456 int vlen_enc = vector_length_encoding(this, $mask);
23457 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23458 $dst$$Register, mask_len, mbt, vlen_enc);
23459 %}
23460 ins_pipe( pipe_slow );
23461 %}
23462
23463 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23464 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23465 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23466 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23467 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23468 ins_encode %{
23469 int opcode = this->ideal_Opcode();
23470 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23471 int mask_len = Matcher::vector_length(this, $mask);
23472 int vlen_enc = vector_length_encoding(this, $mask);
23473 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23474 $dst$$Register, mask_len, mbt, vlen_enc);
23475 %}
23476 ins_pipe( pipe_slow );
23477 %}
23478
23479 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23480 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23481 match(Set dst (VectorMaskTrueCount mask));
23482 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23483 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23484 ins_encode %{
23485 int opcode = this->ideal_Opcode();
23486 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23487 int mask_len = Matcher::vector_length(this, $mask);
23488 int mask_size = mask_len * type2aelembytes(mbt);
23489 int vlen_enc = vector_length_encoding(this, $mask);
23490 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23491 $tmp$$Register, mask_len, mask_size, vlen_enc);
23492 %}
23493 ins_pipe( pipe_slow );
23494 %}
23495
23496 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23497 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23498 match(Set dst (VectorMaskTrueCount mask));
23499 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23500 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23501 ins_encode %{
23502 int opcode = this->ideal_Opcode();
23503 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23504 int mask_len = Matcher::vector_length(this, $mask);
23505 int vlen_enc = vector_length_encoding(this, $mask);
23506 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23507 $tmp$$Register, mask_len, mbt, vlen_enc);
23508 %}
23509 ins_pipe( pipe_slow );
23510 %}
23511
23512 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23513 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23514 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23515 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23516 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23517 ins_encode %{
23518 int opcode = this->ideal_Opcode();
23519 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23520 int mask_len = Matcher::vector_length(this, $mask);
23521 int vlen_enc = vector_length_encoding(this, $mask);
23522 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23523 $tmp$$Register, mask_len, mbt, vlen_enc);
23524 %}
23525 ins_pipe( pipe_slow );
23526 %}
23527
23528 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23529 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23530 match(Set dst (VectorMaskFirstTrue mask));
23531 match(Set dst (VectorMaskLastTrue mask));
23532 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23533 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23534 ins_encode %{
23535 int opcode = this->ideal_Opcode();
23536 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23537 int mask_len = Matcher::vector_length(this, $mask);
23538 int mask_size = mask_len * type2aelembytes(mbt);
23539 int vlen_enc = vector_length_encoding(this, $mask);
23540 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23541 $tmp$$Register, mask_len, mask_size, vlen_enc);
23542 %}
23543 ins_pipe( pipe_slow );
23544 %}
23545
23546 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23547 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23548 match(Set dst (VectorMaskFirstTrue mask));
23549 match(Set dst (VectorMaskLastTrue mask));
23550 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23551 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23552 ins_encode %{
23553 int opcode = this->ideal_Opcode();
23554 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23555 int mask_len = Matcher::vector_length(this, $mask);
23556 int vlen_enc = vector_length_encoding(this, $mask);
23557 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23558 $tmp$$Register, mask_len, mbt, vlen_enc);
23559 %}
23560 ins_pipe( pipe_slow );
23561 %}
23562
23563 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23564 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23565 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23566 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23567 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23568 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23569 ins_encode %{
23570 int opcode = this->ideal_Opcode();
23571 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23572 int mask_len = Matcher::vector_length(this, $mask);
23573 int vlen_enc = vector_length_encoding(this, $mask);
23574 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23575 $tmp$$Register, mask_len, mbt, vlen_enc);
23576 %}
23577 ins_pipe( pipe_slow );
23578 %}
23579
23580 // --------------------------------- Compress/Expand Operations ---------------------------
23581 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23582 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23583 match(Set dst (CompressV src mask));
23584 match(Set dst (ExpandV src mask));
23585 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23586 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23587 ins_encode %{
23588 int opcode = this->ideal_Opcode();
23589 int vlen_enc = vector_length_encoding(this);
23590 BasicType bt = Matcher::vector_element_basic_type(this);
23591 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23592 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23593 %}
23594 ins_pipe( pipe_slow );
23595 %}
23596
23597 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23598 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23599 match(Set dst (CompressV src mask));
23600 match(Set dst (ExpandV src mask));
23601 format %{ "vector_compress_expand $dst, $src, $mask" %}
23602 ins_encode %{
23603 int opcode = this->ideal_Opcode();
23604 int vector_len = vector_length_encoding(this);
23605 BasicType bt = Matcher::vector_element_basic_type(this);
23606 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23607 %}
23608 ins_pipe( pipe_slow );
23609 %}
23610
23611 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23612 match(Set dst (CompressM mask));
23613 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23614 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23615 ins_encode %{
23616 assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
23617 int mask_len = Matcher::vector_length(this);
23618 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23619 %}
23620 ins_pipe( pipe_slow );
23621 %}
23622
23623 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23624
23625 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23626 predicate(!VM_Version::supports_gfni());
23627 match(Set dst (ReverseV src));
23628 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23629 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23630 ins_encode %{
23631 int vec_enc = vector_length_encoding(this);
23632 BasicType bt = Matcher::vector_element_basic_type(this);
23633 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23634 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23635 %}
23636 ins_pipe( pipe_slow );
23637 %}
23638
23639 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23640 predicate(VM_Version::supports_gfni());
23641 match(Set dst (ReverseV src));
23642 effect(TEMP dst, TEMP xtmp);
23643 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23644 ins_encode %{
23645 int vec_enc = vector_length_encoding(this);
23646 BasicType bt = Matcher::vector_element_basic_type(this);
23647 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23648 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23649 $xtmp$$XMMRegister);
23650 %}
23651 ins_pipe( pipe_slow );
23652 %}
23653
23654 instruct vreverse_byte_reg(vec dst, vec src) %{
23655 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23656 match(Set dst (ReverseBytesV src));
23657 effect(TEMP dst);
23658 format %{ "vector_reverse_byte $dst, $src" %}
23659 ins_encode %{
23660 int vec_enc = vector_length_encoding(this);
23661 BasicType bt = Matcher::vector_element_basic_type(this);
23662 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23663 %}
23664 ins_pipe( pipe_slow );
23665 %}
23666
23667 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23668 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23669 match(Set dst (ReverseBytesV src));
23670 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23671 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23672 ins_encode %{
23673 int vec_enc = vector_length_encoding(this);
23674 BasicType bt = Matcher::vector_element_basic_type(this);
23675 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23676 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23677 %}
23678 ins_pipe( pipe_slow );
23679 %}
23680
23681 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23682
23683 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23684 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23685 Matcher::vector_length_in_bytes(n->in(1))));
23686 match(Set dst (CountLeadingZerosV src));
23687 format %{ "vector_count_leading_zeros $dst, $src" %}
23688 ins_encode %{
23689 int vlen_enc = vector_length_encoding(this, $src);
23690 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23691 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23692 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23693 %}
23694 ins_pipe( pipe_slow );
23695 %}
23696
23697 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23698 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23699 Matcher::vector_length_in_bytes(n->in(1))));
23700 match(Set dst (CountLeadingZerosV src mask));
23701 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23702 ins_encode %{
23703 int vlen_enc = vector_length_encoding(this, $src);
23704 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23705 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23706 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23707 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23708 %}
23709 ins_pipe( pipe_slow );
23710 %}
23711
23712 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23713 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23714 VM_Version::supports_avx512cd() &&
23715 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23716 match(Set dst (CountLeadingZerosV src));
23717 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23718 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23719 ins_encode %{
23720 int vlen_enc = vector_length_encoding(this, $src);
23721 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23722 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23723 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23724 %}
23725 ins_pipe( pipe_slow );
23726 %}
23727
23728 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
23729 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23730 match(Set dst (CountLeadingZerosV src));
23731 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23732 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
23733 ins_encode %{
23734 int vlen_enc = vector_length_encoding(this, $src);
23735 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23736 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23737 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
23738 $rtmp$$Register, true, vlen_enc);
23739 %}
23740 ins_pipe( pipe_slow );
23741 %}
23742
23743 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
23744 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
23745 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23746 match(Set dst (CountLeadingZerosV src));
23747 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
23748 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
23749 ins_encode %{
23750 int vlen_enc = vector_length_encoding(this, $src);
23751 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23752 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23753 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
23754 %}
23755 ins_pipe( pipe_slow );
23756 %}
23757
23758 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23759 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
23760 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23761 match(Set dst (CountLeadingZerosV src));
23762 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23763 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23764 ins_encode %{
23765 int vlen_enc = vector_length_encoding(this, $src);
23766 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23767 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23768 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23769 %}
23770 ins_pipe( pipe_slow );
23771 %}
23772
23773 // ---------------------------------- Vector Masked Operations ------------------------------------
23774
23775 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
23776 match(Set dst (AddVB (Binary dst src2) mask));
23777 match(Set dst (AddVS (Binary dst src2) mask));
23778 match(Set dst (AddVI (Binary dst src2) mask));
23779 match(Set dst (AddVL (Binary dst src2) mask));
23780 match(Set dst (AddVF (Binary dst src2) mask));
23781 match(Set dst (AddVD (Binary dst src2) mask));
23782 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23783 ins_encode %{
23784 int vlen_enc = vector_length_encoding(this);
23785 BasicType bt = Matcher::vector_element_basic_type(this);
23786 int opc = this->ideal_Opcode();
23787 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23788 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23789 %}
23790 ins_pipe( pipe_slow );
23791 %}
23792
23793 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
23794 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
23795 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
23796 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
23797 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
23798 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
23799 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
23800 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23801 ins_encode %{
23802 int vlen_enc = vector_length_encoding(this);
23803 BasicType bt = Matcher::vector_element_basic_type(this);
23804 int opc = this->ideal_Opcode();
23805 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23806 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23807 %}
23808 ins_pipe( pipe_slow );
23809 %}
23810
23811 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
23812 match(Set dst (XorV (Binary dst src2) mask));
23813 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23814 ins_encode %{
23815 int vlen_enc = vector_length_encoding(this);
23816 BasicType bt = Matcher::vector_element_basic_type(this);
23817 int opc = this->ideal_Opcode();
23818 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23819 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23820 %}
23821 ins_pipe( pipe_slow );
23822 %}
23823
23824 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
23825 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
23826 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23827 ins_encode %{
23828 int vlen_enc = vector_length_encoding(this);
23829 BasicType bt = Matcher::vector_element_basic_type(this);
23830 int opc = this->ideal_Opcode();
23831 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23832 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23833 %}
23834 ins_pipe( pipe_slow );
23835 %}
23836
23837 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
23838 match(Set dst (OrV (Binary dst src2) mask));
23839 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23840 ins_encode %{
23841 int vlen_enc = vector_length_encoding(this);
23842 BasicType bt = Matcher::vector_element_basic_type(this);
23843 int opc = this->ideal_Opcode();
23844 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23845 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23846 %}
23847 ins_pipe( pipe_slow );
23848 %}
23849
23850 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
23851 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
23852 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23853 ins_encode %{
23854 int vlen_enc = vector_length_encoding(this);
23855 BasicType bt = Matcher::vector_element_basic_type(this);
23856 int opc = this->ideal_Opcode();
23857 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23858 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23859 %}
23860 ins_pipe( pipe_slow );
23861 %}
23862
23863 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
23864 match(Set dst (AndV (Binary dst src2) mask));
23865 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23866 ins_encode %{
23867 int vlen_enc = vector_length_encoding(this);
23868 BasicType bt = Matcher::vector_element_basic_type(this);
23869 int opc = this->ideal_Opcode();
23870 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23871 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23872 %}
23873 ins_pipe( pipe_slow );
23874 %}
23875
23876 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
23877 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
23878 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23879 ins_encode %{
23880 int vlen_enc = vector_length_encoding(this);
23881 BasicType bt = Matcher::vector_element_basic_type(this);
23882 int opc = this->ideal_Opcode();
23883 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23884 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23885 %}
23886 ins_pipe( pipe_slow );
23887 %}
23888
23889 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
23890 match(Set dst (SubVB (Binary dst src2) mask));
23891 match(Set dst (SubVS (Binary dst src2) mask));
23892 match(Set dst (SubVI (Binary dst src2) mask));
23893 match(Set dst (SubVL (Binary dst src2) mask));
23894 match(Set dst (SubVF (Binary dst src2) mask));
23895 match(Set dst (SubVD (Binary dst src2) mask));
23896 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23897 ins_encode %{
23898 int vlen_enc = vector_length_encoding(this);
23899 BasicType bt = Matcher::vector_element_basic_type(this);
23900 int opc = this->ideal_Opcode();
23901 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23902 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23903 %}
23904 ins_pipe( pipe_slow );
23905 %}
23906
23907 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
23908 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
23909 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
23910 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
23911 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
23912 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
23913 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
23914 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23915 ins_encode %{
23916 int vlen_enc = vector_length_encoding(this);
23917 BasicType bt = Matcher::vector_element_basic_type(this);
23918 int opc = this->ideal_Opcode();
23919 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23920 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23921 %}
23922 ins_pipe( pipe_slow );
23923 %}
23924
23925 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
23926 match(Set dst (MulVS (Binary dst src2) mask));
23927 match(Set dst (MulVI (Binary dst src2) mask));
23928 match(Set dst (MulVL (Binary dst src2) mask));
23929 match(Set dst (MulVF (Binary dst src2) mask));
23930 match(Set dst (MulVD (Binary dst src2) mask));
23931 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23932 ins_encode %{
23933 int vlen_enc = vector_length_encoding(this);
23934 BasicType bt = Matcher::vector_element_basic_type(this);
23935 int opc = this->ideal_Opcode();
23936 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23937 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23938 %}
23939 ins_pipe( pipe_slow );
23940 %}
23941
23942 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
23943 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
23944 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
23945 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
23946 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
23947 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
23948 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23949 ins_encode %{
23950 int vlen_enc = vector_length_encoding(this);
23951 BasicType bt = Matcher::vector_element_basic_type(this);
23952 int opc = this->ideal_Opcode();
23953 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23954 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23955 %}
23956 ins_pipe( pipe_slow );
23957 %}
23958
23959 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
23960 match(Set dst (SqrtVF dst mask));
23961 match(Set dst (SqrtVD dst mask));
23962 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
23963 ins_encode %{
23964 int vlen_enc = vector_length_encoding(this);
23965 BasicType bt = Matcher::vector_element_basic_type(this);
23966 int opc = this->ideal_Opcode();
23967 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23968 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
23969 %}
23970 ins_pipe( pipe_slow );
23971 %}
23972
23973 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
23974 match(Set dst (DivVF (Binary dst src2) mask));
23975 match(Set dst (DivVD (Binary dst src2) mask));
23976 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23977 ins_encode %{
23978 int vlen_enc = vector_length_encoding(this);
23979 BasicType bt = Matcher::vector_element_basic_type(this);
23980 int opc = this->ideal_Opcode();
23981 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23982 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23983 %}
23984 ins_pipe( pipe_slow );
23985 %}
23986
23987 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
23988 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
23989 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
23990 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23991 ins_encode %{
23992 int vlen_enc = vector_length_encoding(this);
23993 BasicType bt = Matcher::vector_element_basic_type(this);
23994 int opc = this->ideal_Opcode();
23995 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23996 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23997 %}
23998 ins_pipe( pipe_slow );
23999 %}
24000
24001
24002 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24003 match(Set dst (RotateLeftV (Binary dst shift) mask));
24004 match(Set dst (RotateRightV (Binary dst shift) mask));
24005 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24006 ins_encode %{
24007 int vlen_enc = vector_length_encoding(this);
24008 BasicType bt = Matcher::vector_element_basic_type(this);
24009 int opc = this->ideal_Opcode();
24010 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24011 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24012 %}
24013 ins_pipe( pipe_slow );
24014 %}
24015
24016 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24017 match(Set dst (RotateLeftV (Binary dst src2) mask));
24018 match(Set dst (RotateRightV (Binary dst src2) mask));
24019 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24020 ins_encode %{
24021 int vlen_enc = vector_length_encoding(this);
24022 BasicType bt = Matcher::vector_element_basic_type(this);
24023 int opc = this->ideal_Opcode();
24024 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24025 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24026 %}
24027 ins_pipe( pipe_slow );
24028 %}
24029
24030 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24031 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24032 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24033 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24034 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24035 ins_encode %{
24036 int vlen_enc = vector_length_encoding(this);
24037 BasicType bt = Matcher::vector_element_basic_type(this);
24038 int opc = this->ideal_Opcode();
24039 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24040 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24041 %}
24042 ins_pipe( pipe_slow );
24043 %}
24044
24045 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24046 predicate(!n->as_ShiftV()->is_var_shift());
24047 match(Set dst (LShiftVS (Binary dst src2) mask));
24048 match(Set dst (LShiftVI (Binary dst src2) mask));
24049 match(Set dst (LShiftVL (Binary dst src2) mask));
24050 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24051 ins_encode %{
24052 int vlen_enc = vector_length_encoding(this);
24053 BasicType bt = Matcher::vector_element_basic_type(this);
24054 int opc = this->ideal_Opcode();
24055 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24056 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24057 %}
24058 ins_pipe( pipe_slow );
24059 %}
24060
24061 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24062 predicate(n->as_ShiftV()->is_var_shift());
24063 match(Set dst (LShiftVS (Binary dst src2) mask));
24064 match(Set dst (LShiftVI (Binary dst src2) mask));
24065 match(Set dst (LShiftVL (Binary dst src2) mask));
24066 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24067 ins_encode %{
24068 int vlen_enc = vector_length_encoding(this);
24069 BasicType bt = Matcher::vector_element_basic_type(this);
24070 int opc = this->ideal_Opcode();
24071 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24072 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24073 %}
24074 ins_pipe( pipe_slow );
24075 %}
24076
24077 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24078 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24079 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24080 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24081 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24082 ins_encode %{
24083 int vlen_enc = vector_length_encoding(this);
24084 BasicType bt = Matcher::vector_element_basic_type(this);
24085 int opc = this->ideal_Opcode();
24086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24087 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24088 %}
24089 ins_pipe( pipe_slow );
24090 %}
24091
24092 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24093 predicate(!n->as_ShiftV()->is_var_shift());
24094 match(Set dst (RShiftVS (Binary dst src2) mask));
24095 match(Set dst (RShiftVI (Binary dst src2) mask));
24096 match(Set dst (RShiftVL (Binary dst src2) mask));
24097 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24098 ins_encode %{
24099 int vlen_enc = vector_length_encoding(this);
24100 BasicType bt = Matcher::vector_element_basic_type(this);
24101 int opc = this->ideal_Opcode();
24102 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24103 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24104 %}
24105 ins_pipe( pipe_slow );
24106 %}
24107
24108 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24109 predicate(n->as_ShiftV()->is_var_shift());
24110 match(Set dst (RShiftVS (Binary dst src2) mask));
24111 match(Set dst (RShiftVI (Binary dst src2) mask));
24112 match(Set dst (RShiftVL (Binary dst src2) mask));
24113 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24114 ins_encode %{
24115 int vlen_enc = vector_length_encoding(this);
24116 BasicType bt = Matcher::vector_element_basic_type(this);
24117 int opc = this->ideal_Opcode();
24118 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24119 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24120 %}
24121 ins_pipe( pipe_slow );
24122 %}
24123
24124 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24125 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24126 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24127 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24128 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24129 ins_encode %{
24130 int vlen_enc = vector_length_encoding(this);
24131 BasicType bt = Matcher::vector_element_basic_type(this);
24132 int opc = this->ideal_Opcode();
24133 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24134 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24135 %}
24136 ins_pipe( pipe_slow );
24137 %}
24138
24139 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24140 predicate(!n->as_ShiftV()->is_var_shift());
24141 match(Set dst (URShiftVS (Binary dst src2) mask));
24142 match(Set dst (URShiftVI (Binary dst src2) mask));
24143 match(Set dst (URShiftVL (Binary dst src2) mask));
24144 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24145 ins_encode %{
24146 int vlen_enc = vector_length_encoding(this);
24147 BasicType bt = Matcher::vector_element_basic_type(this);
24148 int opc = this->ideal_Opcode();
24149 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24150 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24151 %}
24152 ins_pipe( pipe_slow );
24153 %}
24154
24155 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24156 predicate(n->as_ShiftV()->is_var_shift());
24157 match(Set dst (URShiftVS (Binary dst src2) mask));
24158 match(Set dst (URShiftVI (Binary dst src2) mask));
24159 match(Set dst (URShiftVL (Binary dst src2) mask));
24160 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24161 ins_encode %{
24162 int vlen_enc = vector_length_encoding(this);
24163 BasicType bt = Matcher::vector_element_basic_type(this);
24164 int opc = this->ideal_Opcode();
24165 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24166 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24167 %}
24168 ins_pipe( pipe_slow );
24169 %}
24170
24171 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24172 match(Set dst (MaxV (Binary dst src2) mask));
24173 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24174 ins_encode %{
24175 int vlen_enc = vector_length_encoding(this);
24176 BasicType bt = Matcher::vector_element_basic_type(this);
24177 int opc = this->ideal_Opcode();
24178 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24179 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24180 %}
24181 ins_pipe( pipe_slow );
24182 %}
24183
24184 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24185 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24186 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24187 ins_encode %{
24188 int vlen_enc = vector_length_encoding(this);
24189 BasicType bt = Matcher::vector_element_basic_type(this);
24190 int opc = this->ideal_Opcode();
24191 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24192 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24193 %}
24194 ins_pipe( pipe_slow );
24195 %}
24196
24197 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24198 match(Set dst (MinV (Binary dst src2) mask));
24199 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24200 ins_encode %{
24201 int vlen_enc = vector_length_encoding(this);
24202 BasicType bt = Matcher::vector_element_basic_type(this);
24203 int opc = this->ideal_Opcode();
24204 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24205 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24206 %}
24207 ins_pipe( pipe_slow );
24208 %}
24209
24210 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24211 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24212 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24213 ins_encode %{
24214 int vlen_enc = vector_length_encoding(this);
24215 BasicType bt = Matcher::vector_element_basic_type(this);
24216 int opc = this->ideal_Opcode();
24217 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24218 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24219 %}
24220 ins_pipe( pipe_slow );
24221 %}
24222
24223 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24224 match(Set dst (VectorRearrange (Binary dst src2) mask));
24225 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24226 ins_encode %{
24227 int vlen_enc = vector_length_encoding(this);
24228 BasicType bt = Matcher::vector_element_basic_type(this);
24229 int opc = this->ideal_Opcode();
24230 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24231 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24232 %}
24233 ins_pipe( pipe_slow );
24234 %}
24235
24236 instruct vabs_masked(vec dst, kReg mask) %{
24237 match(Set dst (AbsVB dst mask));
24238 match(Set dst (AbsVS dst mask));
24239 match(Set dst (AbsVI dst mask));
24240 match(Set dst (AbsVL dst mask));
24241 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24242 ins_encode %{
24243 int vlen_enc = vector_length_encoding(this);
24244 BasicType bt = Matcher::vector_element_basic_type(this);
24245 int opc = this->ideal_Opcode();
24246 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24247 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24248 %}
24249 ins_pipe( pipe_slow );
24250 %}
24251
24252 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24253 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24254 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24255 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24256 ins_encode %{
24257 assert(UseFMA, "Needs FMA instructions support.");
24258 int vlen_enc = vector_length_encoding(this);
24259 BasicType bt = Matcher::vector_element_basic_type(this);
24260 int opc = this->ideal_Opcode();
24261 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24262 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24263 %}
24264 ins_pipe( pipe_slow );
24265 %}
24266
24267 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24268 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24269 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24270 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24271 ins_encode %{
24272 assert(UseFMA, "Needs FMA instructions support.");
24273 int vlen_enc = vector_length_encoding(this);
24274 BasicType bt = Matcher::vector_element_basic_type(this);
24275 int opc = this->ideal_Opcode();
24276 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24277 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24278 %}
24279 ins_pipe( pipe_slow );
24280 %}
24281
24282 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24283 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24284 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24285 ins_encode %{
24286 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24287 int vlen_enc = vector_length_encoding(this, $src1);
24288 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24289
24290 // Comparison i
24291 switch (src1_elem_bt) {
24292 case T_BYTE: {
24293 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24294 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24295 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24296 break;
24297 }
24298 case T_SHORT: {
24299 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24300 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24301 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24302 break;
24303 }
24304 case T_INT: {
24305 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24306 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24307 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24308 break;
24309 }
24310 case T_LONG: {
24311 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24312 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24313 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24314 break;
24315 }
24316 case T_FLOAT: {
24317 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24318 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24319 break;
24320 }
24321 case T_DOUBLE: {
24322 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24323 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24324 break;
24325 }
24326 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24327 }
24328 %}
24329 ins_pipe( pipe_slow );
24330 %}
24331
24332 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24333 predicate(Matcher::vector_length(n) <= 32);
24334 match(Set dst (MaskAll src));
24335 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24336 ins_encode %{
24337 int mask_len = Matcher::vector_length(this);
24338 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24339 %}
24340 ins_pipe( pipe_slow );
24341 %}
24342
24343 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24344 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24345 match(Set dst (XorVMask src (MaskAll cnt)));
24346 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24347 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24348 ins_encode %{
24349 uint masklen = Matcher::vector_length(this);
24350 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24351 %}
24352 ins_pipe( pipe_slow );
24353 %}
24354
24355 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24356 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24357 (Matcher::vector_length(n) == 16) ||
24358 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24359 match(Set dst (XorVMask src (MaskAll cnt)));
24360 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24361 ins_encode %{
24362 uint masklen = Matcher::vector_length(this);
24363 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24364 %}
24365 ins_pipe( pipe_slow );
24366 %}
24367
24368 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24369 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24370 match(Set dst (VectorLongToMask src));
24371 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24372 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24373 ins_encode %{
24374 int mask_len = Matcher::vector_length(this);
24375 int vec_enc = vector_length_encoding(mask_len);
24376 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24377 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24378 %}
24379 ins_pipe( pipe_slow );
24380 %}
24381
24382
24383 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24384 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24385 match(Set dst (VectorLongToMask src));
24386 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24387 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24388 ins_encode %{
24389 int mask_len = Matcher::vector_length(this);
24390 assert(mask_len <= 32, "invalid mask length");
24391 int vec_enc = vector_length_encoding(mask_len);
24392 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24393 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24394 %}
24395 ins_pipe( pipe_slow );
24396 %}
24397
24398 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24399 predicate(n->bottom_type()->isa_pvectmask());
24400 match(Set dst (VectorLongToMask src));
24401 format %{ "long_to_mask_evex $dst, $src\t!" %}
24402 ins_encode %{
24403 __ kmov($dst$$KRegister, $src$$Register);
24404 %}
24405 ins_pipe( pipe_slow );
24406 %}
24407
24408 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24409 match(Set dst (AndVMask src1 src2));
24410 match(Set dst (OrVMask src1 src2));
24411 match(Set dst (XorVMask src1 src2));
24412 effect(TEMP kscratch);
24413 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24414 ins_encode %{
24415 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24416 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24417 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24418 uint masklen = Matcher::vector_length(this);
24419 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24420 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24421 %}
24422 ins_pipe( pipe_slow );
24423 %}
24424
24425 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24426 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24427 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24428 ins_encode %{
24429 int vlen_enc = vector_length_encoding(this);
24430 BasicType bt = Matcher::vector_element_basic_type(this);
24431 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24432 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24433 %}
24434 ins_pipe( pipe_slow );
24435 %}
24436
24437 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24438 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24439 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24440 ins_encode %{
24441 int vlen_enc = vector_length_encoding(this);
24442 BasicType bt = Matcher::vector_element_basic_type(this);
24443 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24444 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24445 %}
24446 ins_pipe( pipe_slow );
24447 %}
24448
24449 instruct castMM(kReg dst)
24450 %{
24451 match(Set dst (CastVV dst));
24452
24453 size(0);
24454 format %{ "# castVV of $dst" %}
24455 ins_encode(/* empty encoding */);
24456 ins_cost(0);
24457 ins_pipe(empty);
24458 %}
24459
24460 instruct castVV(vec dst)
24461 %{
24462 match(Set dst (CastVV dst));
24463
24464 size(0);
24465 format %{ "# castVV of $dst" %}
24466 ins_encode(/* empty encoding */);
24467 ins_cost(0);
24468 ins_pipe(empty);
24469 %}
24470
24471 instruct castVVLeg(legVec dst)
24472 %{
24473 match(Set dst (CastVV dst));
24474
24475 size(0);
24476 format %{ "# castVV of $dst" %}
24477 ins_encode(/* empty encoding */);
24478 ins_cost(0);
24479 ins_pipe(empty);
24480 %}
24481
24482 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24483 %{
24484 match(Set dst (IsInfiniteF src));
24485 effect(TEMP ktmp, KILL cr);
24486 format %{ "float_class_check $dst, $src" %}
24487 ins_encode %{
24488 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24489 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24490 %}
24491 ins_pipe(pipe_slow);
24492 %}
24493
24494 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24495 %{
24496 match(Set dst (IsInfiniteD src));
24497 effect(TEMP ktmp, KILL cr);
24498 format %{ "double_class_check $dst, $src" %}
24499 ins_encode %{
24500 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24501 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24502 %}
24503 ins_pipe(pipe_slow);
24504 %}
24505
24506 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24507 %{
24508 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24509 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24510 match(Set dst (SaturatingAddV src1 src2));
24511 match(Set dst (SaturatingSubV src1 src2));
24512 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24513 ins_encode %{
24514 int vlen_enc = vector_length_encoding(this);
24515 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24516 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24517 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24518 %}
24519 ins_pipe(pipe_slow);
24520 %}
24521
24522 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24523 %{
24524 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24525 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24526 match(Set dst (SaturatingAddV src1 src2));
24527 match(Set dst (SaturatingSubV src1 src2));
24528 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24529 ins_encode %{
24530 int vlen_enc = vector_length_encoding(this);
24531 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24532 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24533 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24534 %}
24535 ins_pipe(pipe_slow);
24536 %}
24537
24538 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24539 %{
24540 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24541 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24542 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24543 match(Set dst (SaturatingAddV src1 src2));
24544 match(Set dst (SaturatingSubV src1 src2));
24545 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24546 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24547 ins_encode %{
24548 int vlen_enc = vector_length_encoding(this);
24549 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24550 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24551 $src1$$XMMRegister, $src2$$XMMRegister,
24552 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24553 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24554 %}
24555 ins_pipe(pipe_slow);
24556 %}
24557
24558 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24559 %{
24560 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24561 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24562 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24563 match(Set dst (SaturatingAddV src1 src2));
24564 match(Set dst (SaturatingSubV src1 src2));
24565 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24566 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24567 ins_encode %{
24568 int vlen_enc = vector_length_encoding(this);
24569 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24570 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24571 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24572 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24573 %}
24574 ins_pipe(pipe_slow);
24575 %}
24576
24577 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24578 %{
24579 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24580 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24581 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24582 match(Set dst (SaturatingAddV src1 src2));
24583 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24584 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24585 ins_encode %{
24586 int vlen_enc = vector_length_encoding(this);
24587 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24588 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24589 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24590 %}
24591 ins_pipe(pipe_slow);
24592 %}
24593
24594 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24595 %{
24596 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24597 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24598 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24599 match(Set dst (SaturatingAddV src1 src2));
24600 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24601 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24602 ins_encode %{
24603 int vlen_enc = vector_length_encoding(this);
24604 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24605 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24606 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24607 %}
24608 ins_pipe(pipe_slow);
24609 %}
24610
24611 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24612 %{
24613 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24614 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24615 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24616 match(Set dst (SaturatingSubV src1 src2));
24617 effect(TEMP ktmp);
24618 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24619 ins_encode %{
24620 int vlen_enc = vector_length_encoding(this);
24621 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24622 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24623 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24624 %}
24625 ins_pipe(pipe_slow);
24626 %}
24627
24628 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24629 %{
24630 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24631 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24632 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24633 match(Set dst (SaturatingSubV src1 src2));
24634 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24635 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24636 ins_encode %{
24637 int vlen_enc = vector_length_encoding(this);
24638 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24639 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24640 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24641 %}
24642 ins_pipe(pipe_slow);
24643 %}
24644
24645 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24646 %{
24647 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24648 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24649 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24650 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24651 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24652 ins_encode %{
24653 int vlen_enc = vector_length_encoding(this);
24654 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24655 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24656 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24657 %}
24658 ins_pipe(pipe_slow);
24659 %}
24660
24661 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24662 %{
24663 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24664 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24665 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24666 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24667 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24668 ins_encode %{
24669 int vlen_enc = vector_length_encoding(this);
24670 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24671 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24672 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24673 %}
24674 ins_pipe(pipe_slow);
24675 %}
24676
24677 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24678 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24679 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24680 match(Set dst (SaturatingAddV (Binary dst src) mask));
24681 match(Set dst (SaturatingSubV (Binary dst src) mask));
24682 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24683 ins_encode %{
24684 int vlen_enc = vector_length_encoding(this);
24685 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24686 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24687 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24688 %}
24689 ins_pipe( pipe_slow );
24690 %}
24691
24692 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24693 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24694 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24695 match(Set dst (SaturatingAddV (Binary dst src) mask));
24696 match(Set dst (SaturatingSubV (Binary dst src) mask));
24697 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24698 ins_encode %{
24699 int vlen_enc = vector_length_encoding(this);
24700 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24701 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24702 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24703 %}
24704 ins_pipe( pipe_slow );
24705 %}
24706
24707 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24708 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24709 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24710 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24711 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24712 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24713 ins_encode %{
24714 int vlen_enc = vector_length_encoding(this);
24715 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24716 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24717 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24718 %}
24719 ins_pipe( pipe_slow );
24720 %}
24721
24722 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24723 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24724 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24725 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24726 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24727 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24728 ins_encode %{
24729 int vlen_enc = vector_length_encoding(this);
24730 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24731 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24732 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
24733 %}
24734 ins_pipe( pipe_slow );
24735 %}
24736
24737 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
24738 %{
24739 match(Set index (SelectFromTwoVector (Binary index src1) src2));
24740 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
24741 ins_encode %{
24742 int vlen_enc = vector_length_encoding(this);
24743 BasicType bt = Matcher::vector_element_basic_type(this);
24744 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24745 %}
24746 ins_pipe(pipe_slow);
24747 %}
24748
24749 instruct reinterpretS2HF(regF dst, rRegI src)
24750 %{
24751 match(Set dst (ReinterpretS2HF src));
24752 format %{ "evmovw $dst, $src" %}
24753 ins_encode %{
24754 __ evmovw($dst$$XMMRegister, $src$$Register);
24755 %}
24756 ins_pipe(pipe_slow);
24757 %}
24758
24759 instruct reinterpretHF2S(rRegI dst, regF src)
24760 %{
24761 match(Set dst (ReinterpretHF2S src));
24762 format %{ "evmovw $dst, $src" %}
24763 ins_encode %{
24764 __ evmovw($dst$$Register, $src$$XMMRegister);
24765 __ narrow_subword_type($dst$$Register, T_SHORT);
24766 %}
24767 ins_pipe(pipe_slow);
24768 %}
24769
24770 instruct convF2HFAndS2HF(regF dst, regF src)
24771 %{
24772 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
24773 format %{ "convF2HFAndS2HF $dst, $src" %}
24774 ins_encode %{
24775 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
24776 %}
24777 ins_pipe(pipe_slow);
24778 %}
24779
24780 instruct convHF2SAndHF2F(regF dst, regF src)
24781 %{
24782 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
24783 format %{ "convHF2SAndHF2F $dst, $src" %}
24784 ins_encode %{
24785 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
24786 %}
24787 ins_pipe(pipe_slow);
24788 %}
24789
24790 instruct scalar_sqrt_HF_reg(regF dst, regF src)
24791 %{
24792 match(Set dst (SqrtHF src));
24793 format %{ "scalar_sqrt_fp16 $dst, $src" %}
24794 ins_encode %{
24795 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
24796 %}
24797 ins_pipe(pipe_slow);
24798 %}
24799
24800 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
24801 %{
24802 match(Set dst (AddHF src1 src2));
24803 match(Set dst (DivHF src1 src2));
24804 match(Set dst (MulHF src1 src2));
24805 match(Set dst (SubHF src1 src2));
24806 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
24807 ins_encode %{
24808 int opcode = this->ideal_Opcode();
24809 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
24810 %}
24811 ins_pipe(pipe_slow);
24812 %}
24813
24814 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
24815 %{
24816 predicate(VM_Version::supports_avx10_2());
24817 match(Set dst (MaxHF src1 src2));
24818 match(Set dst (MinHF src1 src2));
24819
24820 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
24821 ins_encode %{
24822 int opcode = this->ideal_Opcode();
24823 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
24824 %}
24825 ins_pipe( pipe_slow );
24826 %}
24827
24828 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
24829 %{
24830 predicate(!VM_Version::supports_avx10_2());
24831 match(Set dst (MaxHF src1 src2));
24832 match(Set dst (MinHF src1 src2));
24833 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24834
24835 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24836 ins_encode %{
24837 int opcode = this->ideal_Opcode();
24838 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
24839 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
24840 %}
24841 ins_pipe( pipe_slow );
24842 %}
24843
24844 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
24845 %{
24846 match(Set dst (FmaHF src2 (Binary dst src1)));
24847 effect(DEF dst);
24848 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24849 ins_encode %{
24850 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
24851 %}
24852 ins_pipe( pipe_slow );
24853 %}
24854
24855
24856 instruct vector_sqrt_HF_reg(vec dst, vec src)
24857 %{
24858 match(Set dst (SqrtVHF src));
24859 format %{ "vector_sqrt_fp16 $dst, $src" %}
24860 ins_encode %{
24861 int vlen_enc = vector_length_encoding(this);
24862 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24863 %}
24864 ins_pipe(pipe_slow);
24865 %}
24866
24867 instruct vector_sqrt_HF_mem(vec dst, memory src)
24868 %{
24869 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
24870 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
24871 ins_encode %{
24872 int vlen_enc = vector_length_encoding(this);
24873 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
24874 %}
24875 ins_pipe(pipe_slow);
24876 %}
24877
24878 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
24879 %{
24880 match(Set dst (AddVHF src1 src2));
24881 match(Set dst (DivVHF src1 src2));
24882 match(Set dst (MulVHF src1 src2));
24883 match(Set dst (SubVHF src1 src2));
24884 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
24885 ins_encode %{
24886 int vlen_enc = vector_length_encoding(this);
24887 int opcode = this->ideal_Opcode();
24888 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24889 %}
24890 ins_pipe(pipe_slow);
24891 %}
24892
24893
24894 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
24895 %{
24896 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
24897 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
24898 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
24899 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
24900 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
24901 ins_encode %{
24902 int vlen_enc = vector_length_encoding(this);
24903 int opcode = this->ideal_Opcode();
24904 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
24905 %}
24906 ins_pipe(pipe_slow);
24907 %}
24908
24909 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
24910 %{
24911 match(Set dst (FmaVHF src2 (Binary dst src1)));
24912 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24913 ins_encode %{
24914 int vlen_enc = vector_length_encoding(this);
24915 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
24916 %}
24917 ins_pipe( pipe_slow );
24918 %}
24919
24920 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
24921 %{
24922 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
24923 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24924 ins_encode %{
24925 int vlen_enc = vector_length_encoding(this);
24926 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
24927 %}
24928 ins_pipe( pipe_slow );
24929 %}
24930
24931 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
24932 %{
24933 predicate(VM_Version::supports_avx10_2());
24934 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
24935 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
24936 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
24937 ins_encode %{
24938 int vlen_enc = vector_length_encoding(this);
24939 int opcode = this->ideal_Opcode();
24940 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
24941 k0, vlen_enc);
24942 %}
24943 ins_pipe( pipe_slow );
24944 %}
24945
24946 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
24947 %{
24948 predicate(VM_Version::supports_avx10_2());
24949 match(Set dst (MinVHF src1 src2));
24950 match(Set dst (MaxVHF src1 src2));
24951 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
24952 ins_encode %{
24953 int vlen_enc = vector_length_encoding(this);
24954 int opcode = this->ideal_Opcode();
24955 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24956 k0, vlen_enc);
24957 %}
24958 ins_pipe( pipe_slow );
24959 %}
24960
24961 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
24962 %{
24963 predicate(!VM_Version::supports_avx10_2());
24964 match(Set dst (MinVHF src1 src2));
24965 match(Set dst (MaxVHF src1 src2));
24966 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24967 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24968 ins_encode %{
24969 int vlen_enc = vector_length_encoding(this);
24970 int opcode = this->ideal_Opcode();
24971 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
24972 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24973 %}
24974 ins_pipe( pipe_slow );
24975 %}
24976
24977 //----------PEEPHOLE RULES-----------------------------------------------------
24978 // These must follow all instruction definitions as they use the names
24979 // defined in the instructions definitions.
24980 //
24981 // peeppredicate ( rule_predicate );
24982 // // the predicate unless which the peephole rule will be ignored
24983 //
24984 // peepmatch ( root_instr_name [preceding_instruction]* );
24985 //
24986 // peepprocedure ( procedure_name );
24987 // // provide a procedure name to perform the optimization, the procedure should
24988 // // reside in the architecture dependent peephole file, the method has the
24989 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
24990 // // with the arguments being the basic block, the current node index inside the
24991 // // block, the register allocator, the functions upon invoked return a new node
24992 // // defined in peepreplace, and the rules of the nodes appearing in the
24993 // // corresponding peepmatch, the function return true if successful, else
24994 // // return false
24995 //
24996 // peepconstraint %{
24997 // (instruction_number.operand_name relational_op instruction_number.operand_name
24998 // [, ...] );
24999 // // instruction numbers are zero-based using left to right order in peepmatch
25000 //
25001 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25002 // // provide an instruction_number.operand_name for each operand that appears
25003 // // in the replacement instruction's match rule
25004 //
25005 // ---------VM FLAGS---------------------------------------------------------
25006 //
25007 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25008 //
25009 // Each peephole rule is given an identifying number starting with zero and
25010 // increasing by one in the order seen by the parser. An individual peephole
25011 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25012 // on the command-line.
25013 //
25014 // ---------CURRENT LIMITATIONS----------------------------------------------
25015 //
25016 // Only transformations inside a basic block (do we need more for peephole)
25017 //
25018 // ---------EXAMPLE----------------------------------------------------------
25019 //
25020 // // pertinent parts of existing instructions in architecture description
25021 // instruct movI(rRegI dst, rRegI src)
25022 // %{
25023 // match(Set dst (CopyI src));
25024 // %}
25025 //
25026 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25027 // %{
25028 // match(Set dst (AddI dst src));
25029 // effect(KILL cr);
25030 // %}
25031 //
25032 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25033 // %{
25034 // match(Set dst (AddI dst src));
25035 // %}
25036 //
25037 // 1. Simple replacement
25038 // - Only match adjacent instructions in same basic block
25039 // - Only equality constraints
25040 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25041 // - Only one replacement instruction
25042 //
25043 // // Change (inc mov) to lea
25044 // peephole %{
25045 // // lea should only be emitted when beneficial
25046 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25047 // // increment preceded by register-register move
25048 // peepmatch ( incI_rReg movI );
25049 // // require that the destination register of the increment
25050 // // match the destination register of the move
25051 // peepconstraint ( 0.dst == 1.dst );
25052 // // construct a replacement instruction that sets
25053 // // the destination to ( move's source register + one )
25054 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25055 // %}
25056 //
25057 // 2. Procedural replacement
25058 // - More flexible finding relevent nodes
25059 // - More flexible constraints
25060 // - More flexible transformations
25061 // - May utilise architecture-dependent API more effectively
25062 // - Currently only one replacement instruction due to adlc parsing capabilities
25063 //
25064 // // Change (inc mov) to lea
25065 // peephole %{
25066 // // lea should only be emitted when beneficial
25067 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25068 // // the rule numbers of these nodes inside are passed into the function below
25069 // peepmatch ( incI_rReg movI );
25070 // // the method that takes the responsibility of transformation
25071 // peepprocedure ( inc_mov_to_lea );
25072 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25073 // // node is passed into the function above
25074 // peepreplace ( leaI_rReg_immI() );
25075 // %}
25076
25077 // These instructions is not matched by the matcher but used by the peephole
25078 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25079 %{
25080 predicate(false);
25081 match(Set dst (AddI src1 src2));
25082 format %{ "leal $dst, [$src1 + $src2]" %}
25083 ins_encode %{
25084 Register dst = $dst$$Register;
25085 Register src1 = $src1$$Register;
25086 Register src2 = $src2$$Register;
25087 if (src1 != rbp && src1 != r13) {
25088 __ leal(dst, Address(src1, src2, Address::times_1));
25089 } else {
25090 assert(src2 != rbp && src2 != r13, "");
25091 __ leal(dst, Address(src2, src1, Address::times_1));
25092 }
25093 %}
25094 ins_pipe(ialu_reg_reg);
25095 %}
25096
25097 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25098 %{
25099 predicate(false);
25100 match(Set dst (AddI src1 src2));
25101 format %{ "leal $dst, [$src1 + $src2]" %}
25102 ins_encode %{
25103 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25104 %}
25105 ins_pipe(ialu_reg_reg);
25106 %}
25107
25108 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25109 %{
25110 predicate(false);
25111 match(Set dst (LShiftI src shift));
25112 format %{ "leal $dst, [$src << $shift]" %}
25113 ins_encode %{
25114 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25115 Register src = $src$$Register;
25116 if (scale == Address::times_2 && src != rbp && src != r13) {
25117 __ leal($dst$$Register, Address(src, src, Address::times_1));
25118 } else {
25119 __ leal($dst$$Register, Address(noreg, src, scale));
25120 }
25121 %}
25122 ins_pipe(ialu_reg_reg);
25123 %}
25124
25125 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25126 %{
25127 predicate(false);
25128 match(Set dst (AddL src1 src2));
25129 format %{ "leaq $dst, [$src1 + $src2]" %}
25130 ins_encode %{
25131 Register dst = $dst$$Register;
25132 Register src1 = $src1$$Register;
25133 Register src2 = $src2$$Register;
25134 if (src1 != rbp && src1 != r13) {
25135 __ leaq(dst, Address(src1, src2, Address::times_1));
25136 } else {
25137 assert(src2 != rbp && src2 != r13, "");
25138 __ leaq(dst, Address(src2, src1, Address::times_1));
25139 }
25140 %}
25141 ins_pipe(ialu_reg_reg);
25142 %}
25143
25144 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25145 %{
25146 predicate(false);
25147 match(Set dst (AddL src1 src2));
25148 format %{ "leaq $dst, [$src1 + $src2]" %}
25149 ins_encode %{
25150 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25151 %}
25152 ins_pipe(ialu_reg_reg);
25153 %}
25154
25155 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25156 %{
25157 predicate(false);
25158 match(Set dst (LShiftL src shift));
25159 format %{ "leaq $dst, [$src << $shift]" %}
25160 ins_encode %{
25161 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25162 Register src = $src$$Register;
25163 if (scale == Address::times_2 && src != rbp && src != r13) {
25164 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25165 } else {
25166 __ leaq($dst$$Register, Address(noreg, src, scale));
25167 }
25168 %}
25169 ins_pipe(ialu_reg_reg);
25170 %}
25171
25172 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25173 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25174 // processors with at least partial ALU support for lea
25175 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25176 // beneficial for processors with full ALU support
25177 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25178
25179 peephole
25180 %{
25181 peeppredicate(VM_Version::supports_fast_2op_lea());
25182 peepmatch (addI_rReg);
25183 peepprocedure (lea_coalesce_reg);
25184 peepreplace (leaI_rReg_rReg_peep());
25185 %}
25186
25187 peephole
25188 %{
25189 peeppredicate(VM_Version::supports_fast_2op_lea());
25190 peepmatch (addI_rReg_imm);
25191 peepprocedure (lea_coalesce_imm);
25192 peepreplace (leaI_rReg_immI_peep());
25193 %}
25194
25195 peephole
25196 %{
25197 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25198 VM_Version::is_intel_cascade_lake());
25199 peepmatch (incI_rReg);
25200 peepprocedure (lea_coalesce_imm);
25201 peepreplace (leaI_rReg_immI_peep());
25202 %}
25203
25204 peephole
25205 %{
25206 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25207 VM_Version::is_intel_cascade_lake());
25208 peepmatch (decI_rReg);
25209 peepprocedure (lea_coalesce_imm);
25210 peepreplace (leaI_rReg_immI_peep());
25211 %}
25212
25213 peephole
25214 %{
25215 peeppredicate(VM_Version::supports_fast_2op_lea());
25216 peepmatch (salI_rReg_immI2);
25217 peepprocedure (lea_coalesce_imm);
25218 peepreplace (leaI_rReg_immI2_peep());
25219 %}
25220
25221 peephole
25222 %{
25223 peeppredicate(VM_Version::supports_fast_2op_lea());
25224 peepmatch (addL_rReg);
25225 peepprocedure (lea_coalesce_reg);
25226 peepreplace (leaL_rReg_rReg_peep());
25227 %}
25228
25229 peephole
25230 %{
25231 peeppredicate(VM_Version::supports_fast_2op_lea());
25232 peepmatch (addL_rReg_imm);
25233 peepprocedure (lea_coalesce_imm);
25234 peepreplace (leaL_rReg_immL32_peep());
25235 %}
25236
25237 peephole
25238 %{
25239 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25240 VM_Version::is_intel_cascade_lake());
25241 peepmatch (incL_rReg);
25242 peepprocedure (lea_coalesce_imm);
25243 peepreplace (leaL_rReg_immL32_peep());
25244 %}
25245
25246 peephole
25247 %{
25248 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25249 VM_Version::is_intel_cascade_lake());
25250 peepmatch (decL_rReg);
25251 peepprocedure (lea_coalesce_imm);
25252 peepreplace (leaL_rReg_immL32_peep());
25253 %}
25254
25255 peephole
25256 %{
25257 peeppredicate(VM_Version::supports_fast_2op_lea());
25258 peepmatch (salL_rReg_immI2);
25259 peepprocedure (lea_coalesce_imm);
25260 peepreplace (leaL_rReg_immI2_peep());
25261 %}
25262
25263 peephole
25264 %{
25265 peepmatch (leaPCompressedOopOffset);
25266 peepprocedure (lea_remove_redundant);
25267 %}
25268
25269 peephole
25270 %{
25271 peepmatch (leaP8Narrow);
25272 peepprocedure (lea_remove_redundant);
25273 %}
25274
25275 peephole
25276 %{
25277 peepmatch (leaP32Narrow);
25278 peepprocedure (lea_remove_redundant);
25279 %}
25280
25281 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25282 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25283
25284 //int variant
25285 peephole
25286 %{
25287 peepmatch (testI_reg);
25288 peepprocedure (test_may_remove);
25289 %}
25290
25291 //long variant
25292 peephole
25293 %{
25294 peepmatch (testL_reg);
25295 peepprocedure (test_may_remove);
25296 %}
25297
25298
25299 //----------SMARTSPILL RULES---------------------------------------------------
25300 // These must follow all instruction definitions as they use the names
25301 // defined in the instructions definitions.