1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 enum FP_PREC {
1712 fp_prec_hlf,
1713 fp_prec_flt,
1714 fp_prec_dbl
1715 };
1716
1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1718 XMMRegister p, XMMRegister q) {
1719 if (pt == fp_prec_hlf) {
1720 __ evucomish(p, q);
1721 } else if (pt == fp_prec_flt) {
1722 __ ucomiss(p, q);
1723 } else {
1724 __ ucomisd(p, q);
1725 }
1726 }
1727
1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1729 XMMRegister dst, XMMRegister src, Register scratch) {
1730 if (pt == fp_prec_hlf) {
1731 __ movhlf(dst, src, scratch);
1732 } else if (pt == fp_prec_flt) {
1733 __ movflt(dst, src);
1734 } else {
1735 __ movdbl(dst, src);
1736 }
1737 }
1738
1739 // Math.min() # Math.max()
1740 // -----------------------------
1741 // (v)ucomis[h/s/d] #
1742 // ja -> b # a
1743 // jp -> NaN # NaN
1744 // jb -> a # b
1745 // je -> a | b # a & b
1746 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1747 XMMRegister a, XMMRegister b, Register rt,
1748 bool min, enum FP_PREC pt) {
1749 Label nan, zero, below, above, done;
1750
1751 emit_fp_ucom(masm, pt, a, b);
1752
1753 if (dst->encoding() != (min ? b : a)->encoding()) {
1754 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1755 } else {
1756 __ jccb(Assembler::above, done);
1757 }
1758 __ jccb(Assembler::parity, nan); // PF=1
1759 __ jccb(Assembler::below, below); // CF=1
1760
1761 // equal
1762 // Using bitwise operations is a low cost way to compute the correct result
1763 // for zero and non-zero inputs in this scenario except for NaN, which is
1764 // handled separately. The mantissa and exponent are valid with either
1765 // bitwise operation. For zero inputs, the sign bit is chosen according to
1766 // whether a minimum or maximum value is required.
1767 if (min) {
1768 // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
1769 __ vpor(dst, a, b, Assembler::AVX_128bit);
1770 } else {
1771 // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
1772 __ vpand(dst, a, b, Assembler::AVX_128bit);
1773 }
1774 __ jmp(done);
1775
1776 __ bind(above);
1777 movfp(masm, pt, dst, min ? b : a, rt);
1778 __ jmp(done);
1779
1780 __ bind(nan);
1781 if (pt == fp_prec_hlf) {
1782 __ movl(rt, 0x00007e00); // Float16.NaN
1783 __ evmovw(dst, rt);
1784 } else if (pt == fp_prec_flt) {
1785 __ movl(rt, 0x7fc00000); // Float.NaN
1786 __ movdl(dst, rt);
1787 } else {
1788 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1789 __ movdq(dst, rt);
1790 }
1791 __ jmp(done);
1792
1793 __ bind(below);
1794 movfp(masm, pt, dst, min ? a : b, rt);
1795
1796 __ bind(done);
1797 }
1798
1799 //=============================================================================
1800 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1801
1802 int ConstantTable::calculate_table_base_offset() const {
1803 return 0; // absolute addressing, no offset
1804 }
1805
1806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1808 ShouldNotReachHere();
1809 }
1810
1811 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1812 // Empty encoding
1813 }
1814
1815 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1816 return 0;
1817 }
1818
1819 #ifndef PRODUCT
1820 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1821 st->print("# MachConstantBaseNode (empty encoding)");
1822 }
1823 #endif
1824
1825
1826 //=============================================================================
1827 #ifndef PRODUCT
1828 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1829 Compile* C = ra_->C;
1830
1831 int framesize = C->output()->frame_size_in_bytes();
1832 int bangsize = C->output()->bang_size_in_bytes();
1833 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1834 // Remove wordSize for return addr which is already pushed.
1835 framesize -= wordSize;
1836
1837 if (C->output()->need_stack_bang(bangsize)) {
1838 framesize -= wordSize;
1839 st->print("# stack bang (%d bytes)", bangsize);
1840 st->print("\n\t");
1841 st->print("pushq rbp\t# Save rbp");
1842 if (PreserveFramePointer) {
1843 st->print("\n\t");
1844 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1845 }
1846 if (framesize) {
1847 st->print("\n\t");
1848 st->print("subq rsp, #%d\t# Create frame",framesize);
1849 }
1850 } else {
1851 st->print("subq rsp, #%d\t# Create frame",framesize);
1852 st->print("\n\t");
1853 framesize -= wordSize;
1854 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1855 if (PreserveFramePointer) {
1856 st->print("\n\t");
1857 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1858 if (framesize > 0) {
1859 st->print("\n\t");
1860 st->print("addq rbp, #%d", framesize);
1861 }
1862 }
1863 }
1864
1865 if (VerifyStackAtCalls) {
1866 st->print("\n\t");
1867 framesize -= wordSize;
1868 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1869 #ifdef ASSERT
1870 st->print("\n\t");
1871 st->print("# stack alignment check");
1872 #endif
1873 }
1874 if (C->stub_function() != nullptr) {
1875 st->print("\n\t");
1876 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1877 st->print("\n\t");
1878 st->print("je fast_entry\t");
1879 st->print("\n\t");
1880 st->print("call #nmethod_entry_barrier_stub\t");
1881 st->print("\n\tfast_entry:");
1882 }
1883 st->cr();
1884 }
1885 #endif
1886
1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1888 Compile* C = ra_->C;
1889
1890 int framesize = C->output()->frame_size_in_bytes();
1891 int bangsize = C->output()->bang_size_in_bytes();
1892
1893 if (C->clinit_barrier_on_entry()) {
1894 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1895 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1896
1897 Label L_skip_barrier;
1898 Register klass = rscratch1;
1899
1900 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1901 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1902
1903 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1904
1905 __ bind(L_skip_barrier);
1906 }
1907
1908 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1909
1910 C->output()->set_frame_complete(__ offset());
1911
1912 if (C->has_mach_constant_base_node()) {
1913 // NOTE: We set the table base offset here because users might be
1914 // emitted before MachConstantBaseNode.
1915 ConstantTable& constant_table = C->output()->constant_table();
1916 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1917 }
1918 }
1919
1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1921 {
1922 return MachNode::size(ra_); // too many variables; just compute it
1923 // the hard way
1924 }
1925
1926 int MachPrologNode::reloc() const
1927 {
1928 return 0; // a large enough number
1929 }
1930
1931 //=============================================================================
1932 #ifndef PRODUCT
1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1934 {
1935 Compile* C = ra_->C;
1936 if (generate_vzeroupper(C)) {
1937 st->print("vzeroupper");
1938 st->cr(); st->print("\t");
1939 }
1940
1941 int framesize = C->output()->frame_size_in_bytes();
1942 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1943 // Remove word for return adr already pushed
1944 // and RBP
1945 framesize -= 2*wordSize;
1946
1947 if (framesize) {
1948 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1949 st->print("\t");
1950 }
1951
1952 st->print_cr("popq rbp");
1953 if (do_polling() && C->is_method_compilation()) {
1954 st->print("\t");
1955 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1956 "ja #safepoint_stub\t"
1957 "# Safepoint: poll for GC");
1958 }
1959 }
1960 #endif
1961
1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1963 {
1964 Compile* C = ra_->C;
1965
1966 if (generate_vzeroupper(C)) {
1967 // Clear upper bits of YMM registers when current compiled code uses
1968 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1969 __ vzeroupper();
1970 }
1971
1972 int framesize = C->output()->frame_size_in_bytes();
1973 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1974 // Remove word for return adr already pushed
1975 // and RBP
1976 framesize -= 2*wordSize;
1977
1978 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1979
1980 if (framesize) {
1981 __ addq(rsp, framesize);
1982 }
1983
1984 __ popq(rbp);
1985
1986 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1987 __ reserved_stack_check();
1988 }
1989
1990 if (do_polling() && C->is_method_compilation()) {
1991 Label dummy_label;
1992 Label* code_stub = &dummy_label;
1993 if (!C->output()->in_scratch_emit_size()) {
1994 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1995 C->output()->add_stub(stub);
1996 code_stub = &stub->entry();
1997 }
1998 __ relocate(relocInfo::poll_return_type);
1999 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2000 }
2001 }
2002
2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2004 {
2005 return MachNode::size(ra_); // too many variables; just compute it
2006 // the hard way
2007 }
2008
2009 int MachEpilogNode::reloc() const
2010 {
2011 return 2; // a large enough number
2012 }
2013
2014 const Pipeline* MachEpilogNode::pipeline() const
2015 {
2016 return MachNode::pipeline_class();
2017 }
2018
2019 //=============================================================================
2020
2021 enum RC {
2022 rc_bad,
2023 rc_int,
2024 rc_kreg,
2025 rc_float,
2026 rc_stack
2027 };
2028
2029 static enum RC rc_class(OptoReg::Name reg)
2030 {
2031 if( !OptoReg::is_valid(reg) ) return rc_bad;
2032
2033 if (OptoReg::is_stack(reg)) return rc_stack;
2034
2035 VMReg r = OptoReg::as_VMReg(reg);
2036
2037 if (r->is_Register()) return rc_int;
2038
2039 if (r->is_KRegister()) return rc_kreg;
2040
2041 assert(r->is_XMMRegister(), "must be");
2042 return rc_float;
2043 }
2044
2045 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2046 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2047 int src_hi, int dst_hi, uint ireg, outputStream* st);
2048
2049 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2050 int stack_offset, int reg, uint ireg, outputStream* st);
2051
2052 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2053 int dst_offset, uint ireg, outputStream* st) {
2054 if (masm) {
2055 switch (ireg) {
2056 case Op_VecS:
2057 __ movq(Address(rsp, -8), rax);
2058 __ movl(rax, Address(rsp, src_offset));
2059 __ movl(Address(rsp, dst_offset), rax);
2060 __ movq(rax, Address(rsp, -8));
2061 break;
2062 case Op_VecD:
2063 __ pushq(Address(rsp, src_offset));
2064 __ popq (Address(rsp, dst_offset));
2065 break;
2066 case Op_VecX:
2067 __ pushq(Address(rsp, src_offset));
2068 __ popq (Address(rsp, dst_offset));
2069 __ pushq(Address(rsp, src_offset+8));
2070 __ popq (Address(rsp, dst_offset+8));
2071 break;
2072 case Op_VecY:
2073 __ vmovdqu(Address(rsp, -32), xmm0);
2074 __ vmovdqu(xmm0, Address(rsp, src_offset));
2075 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2076 __ vmovdqu(xmm0, Address(rsp, -32));
2077 break;
2078 case Op_VecZ:
2079 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2080 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2081 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2082 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2083 break;
2084 default:
2085 ShouldNotReachHere();
2086 }
2087 #ifndef PRODUCT
2088 } else {
2089 switch (ireg) {
2090 case Op_VecS:
2091 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2092 "movl rax, [rsp + #%d]\n\t"
2093 "movl [rsp + #%d], rax\n\t"
2094 "movq rax, [rsp - #8]",
2095 src_offset, dst_offset);
2096 break;
2097 case Op_VecD:
2098 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset);
2101 break;
2102 case Op_VecX:
2103 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2104 "popq [rsp + #%d]\n\t"
2105 "pushq [rsp + #%d]\n\t"
2106 "popq [rsp + #%d]",
2107 src_offset, dst_offset, src_offset+8, dst_offset+8);
2108 break;
2109 case Op_VecY:
2110 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #32]",
2114 src_offset, dst_offset);
2115 break;
2116 case Op_VecZ:
2117 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2118 "vmovdqu xmm0, [rsp + #%d]\n\t"
2119 "vmovdqu [rsp + #%d], xmm0\n\t"
2120 "vmovdqu xmm0, [rsp - #64]",
2121 src_offset, dst_offset);
2122 break;
2123 default:
2124 ShouldNotReachHere();
2125 }
2126 #endif
2127 }
2128 }
2129
2130 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2131 PhaseRegAlloc* ra_,
2132 bool do_size,
2133 outputStream* st) const {
2134 assert(masm != nullptr || st != nullptr, "sanity");
2135 // Get registers to move
2136 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2137 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2138 OptoReg::Name dst_second = ra_->get_reg_second(this);
2139 OptoReg::Name dst_first = ra_->get_reg_first(this);
2140
2141 enum RC src_second_rc = rc_class(src_second);
2142 enum RC src_first_rc = rc_class(src_first);
2143 enum RC dst_second_rc = rc_class(dst_second);
2144 enum RC dst_first_rc = rc_class(dst_first);
2145
2146 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2147 "must move at least 1 register" );
2148
2149 if (src_first == dst_first && src_second == dst_second) {
2150 // Self copy, no move
2151 return 0;
2152 }
2153 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
2154 uint ireg = ideal_reg();
2155 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2156 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2157 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2158 // mem -> mem
2159 int src_offset = ra_->reg2offset(src_first);
2160 int dst_offset = ra_->reg2offset(dst_first);
2161 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2162 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2163 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2164 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2165 int stack_offset = ra_->reg2offset(dst_first);
2166 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2167 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2168 int stack_offset = ra_->reg2offset(src_first);
2169 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2170 } else {
2171 ShouldNotReachHere();
2172 }
2173 return 0;
2174 }
2175 if (src_first_rc == rc_stack) {
2176 // mem ->
2177 if (dst_first_rc == rc_stack) {
2178 // mem -> mem
2179 assert(src_second != dst_first, "overlap");
2180 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2181 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2182 // 64-bit
2183 int src_offset = ra_->reg2offset(src_first);
2184 int dst_offset = ra_->reg2offset(dst_first);
2185 if (masm) {
2186 __ pushq(Address(rsp, src_offset));
2187 __ popq (Address(rsp, dst_offset));
2188 #ifndef PRODUCT
2189 } else {
2190 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2191 "popq [rsp + #%d]",
2192 src_offset, dst_offset);
2193 #endif
2194 }
2195 } else {
2196 // 32-bit
2197 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2198 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2199 // No pushl/popl, so:
2200 int src_offset = ra_->reg2offset(src_first);
2201 int dst_offset = ra_->reg2offset(dst_first);
2202 if (masm) {
2203 __ movq(Address(rsp, -8), rax);
2204 __ movl(rax, Address(rsp, src_offset));
2205 __ movl(Address(rsp, dst_offset), rax);
2206 __ movq(rax, Address(rsp, -8));
2207 #ifndef PRODUCT
2208 } else {
2209 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2210 "movl rax, [rsp + #%d]\n\t"
2211 "movl [rsp + #%d], rax\n\t"
2212 "movq rax, [rsp - #8]",
2213 src_offset, dst_offset);
2214 #endif
2215 }
2216 }
2217 return 0;
2218 } else if (dst_first_rc == rc_int) {
2219 // mem -> gpr
2220 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2221 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2222 // 64-bit
2223 int offset = ra_->reg2offset(src_first);
2224 if (masm) {
2225 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2226 #ifndef PRODUCT
2227 } else {
2228 st->print("movq %s, [rsp + #%d]\t# spill",
2229 Matcher::regName[dst_first],
2230 offset);
2231 #endif
2232 }
2233 } else {
2234 // 32-bit
2235 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2236 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("movl %s, [rsp + #%d]\t# spill",
2243 Matcher::regName[dst_first],
2244 offset);
2245 #endif
2246 }
2247 }
2248 return 0;
2249 } else if (dst_first_rc == rc_float) {
2250 // mem-> xmm
2251 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2252 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2253 // 64-bit
2254 int offset = ra_->reg2offset(src_first);
2255 if (masm) {
2256 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2257 #ifndef PRODUCT
2258 } else {
2259 st->print("%s %s, [rsp + #%d]\t# spill",
2260 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2261 Matcher::regName[dst_first],
2262 offset);
2263 #endif
2264 }
2265 } else {
2266 // 32-bit
2267 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2268 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2269 int offset = ra_->reg2offset(src_first);
2270 if (masm) {
2271 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2272 #ifndef PRODUCT
2273 } else {
2274 st->print("movss %s, [rsp + #%d]\t# spill",
2275 Matcher::regName[dst_first],
2276 offset);
2277 #endif
2278 }
2279 }
2280 return 0;
2281 } else if (dst_first_rc == rc_kreg) {
2282 // mem -> kreg
2283 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2284 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2285 // 64-bit
2286 int offset = ra_->reg2offset(src_first);
2287 if (masm) {
2288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2289 #ifndef PRODUCT
2290 } else {
2291 st->print("kmovq %s, [rsp + #%d]\t# spill",
2292 Matcher::regName[dst_first],
2293 offset);
2294 #endif
2295 }
2296 }
2297 return 0;
2298 }
2299 } else if (src_first_rc == rc_int) {
2300 // gpr ->
2301 if (dst_first_rc == rc_stack) {
2302 // gpr -> mem
2303 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2304 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2305 // 64-bit
2306 int offset = ra_->reg2offset(dst_first);
2307 if (masm) {
2308 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2309 #ifndef PRODUCT
2310 } else {
2311 st->print("movq [rsp + #%d], %s\t# spill",
2312 offset,
2313 Matcher::regName[src_first]);
2314 #endif
2315 }
2316 } else {
2317 // 32-bit
2318 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2319 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2320 int offset = ra_->reg2offset(dst_first);
2321 if (masm) {
2322 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movl [rsp + #%d], %s\t# spill",
2326 offset,
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 }
2331 return 0;
2332 } else if (dst_first_rc == rc_int) {
2333 // gpr -> gpr
2334 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2335 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2336 // 64-bit
2337 if (masm) {
2338 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2339 as_Register(Matcher::_regEncode[src_first]));
2340 #ifndef PRODUCT
2341 } else {
2342 st->print("movq %s, %s\t# spill",
2343 Matcher::regName[dst_first],
2344 Matcher::regName[src_first]);
2345 #endif
2346 }
2347 return 0;
2348 } else {
2349 // 32-bit
2350 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2351 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2352 if (masm) {
2353 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2354 as_Register(Matcher::_regEncode[src_first]));
2355 #ifndef PRODUCT
2356 } else {
2357 st->print("movl %s, %s\t# spill",
2358 Matcher::regName[dst_first],
2359 Matcher::regName[src_first]);
2360 #endif
2361 }
2362 return 0;
2363 }
2364 } else if (dst_first_rc == rc_float) {
2365 // gpr -> xmm
2366 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2367 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2368 // 64-bit
2369 if (masm) {
2370 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2371 #ifndef PRODUCT
2372 } else {
2373 st->print("movdq %s, %s\t# spill",
2374 Matcher::regName[dst_first],
2375 Matcher::regName[src_first]);
2376 #endif
2377 }
2378 } else {
2379 // 32-bit
2380 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2381 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2382 if (masm) {
2383 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2384 #ifndef PRODUCT
2385 } else {
2386 st->print("movdl %s, %s\t# spill",
2387 Matcher::regName[dst_first],
2388 Matcher::regName[src_first]);
2389 #endif
2390 }
2391 }
2392 return 0;
2393 } else if (dst_first_rc == rc_kreg) {
2394 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2395 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2396 // 64-bit
2397 if (masm) {
2398 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2399 #ifndef PRODUCT
2400 } else {
2401 st->print("kmovq %s, %s\t# spill",
2402 Matcher::regName[dst_first],
2403 Matcher::regName[src_first]);
2404 #endif
2405 }
2406 }
2407 Unimplemented();
2408 return 0;
2409 }
2410 } else if (src_first_rc == rc_float) {
2411 // xmm ->
2412 if (dst_first_rc == rc_stack) {
2413 // xmm -> mem
2414 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2415 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2416 // 64-bit
2417 int offset = ra_->reg2offset(dst_first);
2418 if (masm) {
2419 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2420 #ifndef PRODUCT
2421 } else {
2422 st->print("movsd [rsp + #%d], %s\t# spill",
2423 offset,
2424 Matcher::regName[src_first]);
2425 #endif
2426 }
2427 } else {
2428 // 32-bit
2429 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2430 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2431 int offset = ra_->reg2offset(dst_first);
2432 if (masm) {
2433 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2434 #ifndef PRODUCT
2435 } else {
2436 st->print("movss [rsp + #%d], %s\t# spill",
2437 offset,
2438 Matcher::regName[src_first]);
2439 #endif
2440 }
2441 }
2442 return 0;
2443 } else if (dst_first_rc == rc_int) {
2444 // xmm -> gpr
2445 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2446 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2447 // 64-bit
2448 if (masm) {
2449 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2450 #ifndef PRODUCT
2451 } else {
2452 st->print("movdq %s, %s\t# spill",
2453 Matcher::regName[dst_first],
2454 Matcher::regName[src_first]);
2455 #endif
2456 }
2457 } else {
2458 // 32-bit
2459 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2460 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2461 if (masm) {
2462 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2463 #ifndef PRODUCT
2464 } else {
2465 st->print("movdl %s, %s\t# spill",
2466 Matcher::regName[dst_first],
2467 Matcher::regName[src_first]);
2468 #endif
2469 }
2470 }
2471 return 0;
2472 } else if (dst_first_rc == rc_float) {
2473 // xmm -> xmm
2474 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2475 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2476 // 64-bit
2477 if (masm) {
2478 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2479 #ifndef PRODUCT
2480 } else {
2481 st->print("%s %s, %s\t# spill",
2482 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2483 Matcher::regName[dst_first],
2484 Matcher::regName[src_first]);
2485 #endif
2486 }
2487 } else {
2488 // 32-bit
2489 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2490 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2491 if (masm) {
2492 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2493 #ifndef PRODUCT
2494 } else {
2495 st->print("%s %s, %s\t# spill",
2496 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2497 Matcher::regName[dst_first],
2498 Matcher::regName[src_first]);
2499 #endif
2500 }
2501 }
2502 return 0;
2503 } else if (dst_first_rc == rc_kreg) {
2504 assert(false, "Illegal spilling");
2505 return 0;
2506 }
2507 } else if (src_first_rc == rc_kreg) {
2508 if (dst_first_rc == rc_stack) {
2509 // mem -> kreg
2510 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2511 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2512 // 64-bit
2513 int offset = ra_->reg2offset(dst_first);
2514 if (masm) {
2515 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2516 #ifndef PRODUCT
2517 } else {
2518 st->print("kmovq [rsp + #%d] , %s\t# spill",
2519 offset,
2520 Matcher::regName[src_first]);
2521 #endif
2522 }
2523 }
2524 return 0;
2525 } else if (dst_first_rc == rc_int) {
2526 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2527 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2528 // 64-bit
2529 if (masm) {
2530 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2531 #ifndef PRODUCT
2532 } else {
2533 st->print("kmovq %s, %s\t# spill",
2534 Matcher::regName[dst_first],
2535 Matcher::regName[src_first]);
2536 #endif
2537 }
2538 }
2539 Unimplemented();
2540 return 0;
2541 } else if (dst_first_rc == rc_kreg) {
2542 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2543 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2544 // 64-bit
2545 if (masm) {
2546 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2547 #ifndef PRODUCT
2548 } else {
2549 st->print("kmovq %s, %s\t# spill",
2550 Matcher::regName[dst_first],
2551 Matcher::regName[src_first]);
2552 #endif
2553 }
2554 }
2555 return 0;
2556 } else if (dst_first_rc == rc_float) {
2557 assert(false, "Illegal spill");
2558 return 0;
2559 }
2560 }
2561
2562 assert(0," foo ");
2563 Unimplemented();
2564 return 0;
2565 }
2566
2567 #ifndef PRODUCT
2568 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2569 implementation(nullptr, ra_, false, st);
2570 }
2571 #endif
2572
2573 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2574 implementation(masm, ra_, false, nullptr);
2575 }
2576
2577 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2578 return MachNode::size(ra_);
2579 }
2580
2581 //=============================================================================
2582 #ifndef PRODUCT
2583 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 int reg = ra_->get_reg_first(this);
2587 st->print("leaq %s, [rsp + #%d]\t# box lock",
2588 Matcher::regName[reg], offset);
2589 }
2590 #endif
2591
2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_encode(this);
2596
2597 __ lea(as_Register(reg), Address(rsp, offset));
2598 }
2599
2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2601 {
2602 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2603 if (ra_->get_encode(this) > 15) {
2604 return (offset < 0x80) ? 6 : 9; // REX2
2605 } else {
2606 return (offset < 0x80) ? 5 : 8; // REX
2607 }
2608 }
2609
2610 //=============================================================================
2611 #ifndef PRODUCT
2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2613 {
2614 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2615 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2616 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2617 }
2618 #endif
2619
2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2621 {
2622 __ ic_check(InteriorEntryAlignment);
2623 }
2624
2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2626 {
2627 return MachNode::size(ra_); // too many variables; just compute it
2628 // the hard way
2629 }
2630
2631
2632 //=============================================================================
2633
2634 bool Matcher::supports_vector_calling_convention(void) {
2635 return EnableVectorSupport;
2636 }
2637
2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2639 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2640 }
2641
2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2643 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2644 }
2645
2646 #ifdef ASSERT
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2649 }
2650 #endif
2651
2652 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2653 int oper_index) {
2654 if (mdef == nullptr) {
2655 return false;
2656 }
2657
2658 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2659 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2660 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2661 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2662 return false;
2663 }
2664
2665 // Complex memory operand covers multiple incoming edges needed for
2666 // address computation. Biasing def towards any address component will not
2667 // result in NDD demotion by assembler.
2668 if (mdef->operand_num_edges(oper_index) != 1) {
2669 return false;
2670 }
2671
2672 // Demotion candidate must be register mask compatible with definition.
2673 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2674 if (!oper_mask.overlap(mdef->out_RegMask())) {
2675 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2676 return false;
2677 }
2678
2679 switch (oper_index) {
2680 // First operand of MachNode corresponding to Intel APX NDD selection
2681 // pattern can share its assigned register with definition operand if
2682 // their live ranges do not overlap. In such a scenario we can demote
2683 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2684 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2685 // are decorated with a special flag by instruction selector.
2686 case 1:
2687 return is_ndd_demotable_opr1(mdef);
2688
2689 // Definition operand of commutative operation can be biased towards second
2690 // operand.
2691 case 2:
2692 return is_ndd_demotable_opr2(mdef);
2693
2694 // Current scheme only selects up to two biasing candidates
2695 default:
2696 assert(false, "unhandled operand index: %s", mdef->Name());
2697 break;
2698 }
2699
2700 return false;
2701 }
2702
2703 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2704 assert(EnableVectorSupport, "sanity");
2705 int lo = XMM0_num;
2706 int hi = XMM0b_num;
2707 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2708 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2709 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2710 return OptoRegPair(hi, lo);
2711 }
2712
2713 // Is this branch offset short enough that a short branch can be used?
2714 //
2715 // NOTE: If the platform does not provide any short branch variants, then
2716 // this method should return false for offset 0.
2717 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2718 // The passed offset is relative to address of the branch.
2719 // On 86 a branch displacement is calculated relative to address
2720 // of a next instruction.
2721 offset -= br_size;
2722
2723 // the short version of jmpConUCF2 contains multiple branches,
2724 // making the reach slightly less
2725 if (rule == jmpConUCF2_rule)
2726 return (-126 <= offset && offset <= 125);
2727 return (-128 <= offset && offset <= 127);
2728 }
2729
2730 #ifdef ASSERT
2731 // Return whether or not this register is ever used as an argument.
2732 bool Matcher::can_be_java_arg(int reg)
2733 {
2734 return
2735 reg == RDI_num || reg == RDI_H_num ||
2736 reg == RSI_num || reg == RSI_H_num ||
2737 reg == RDX_num || reg == RDX_H_num ||
2738 reg == RCX_num || reg == RCX_H_num ||
2739 reg == R8_num || reg == R8_H_num ||
2740 reg == R9_num || reg == R9_H_num ||
2741 reg == R12_num || reg == R12_H_num ||
2742 reg == XMM0_num || reg == XMM0b_num ||
2743 reg == XMM1_num || reg == XMM1b_num ||
2744 reg == XMM2_num || reg == XMM2b_num ||
2745 reg == XMM3_num || reg == XMM3b_num ||
2746 reg == XMM4_num || reg == XMM4b_num ||
2747 reg == XMM5_num || reg == XMM5b_num ||
2748 reg == XMM6_num || reg == XMM6b_num ||
2749 reg == XMM7_num || reg == XMM7b_num;
2750 }
2751 #endif
2752
2753 uint Matcher::int_pressure_limit()
2754 {
2755 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2756 }
2757
2758 uint Matcher::float_pressure_limit()
2759 {
2760 // After experiment around with different values, the following default threshold
2761 // works best for LCM's register pressure scheduling on x64.
2762 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2763 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2764 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2765 }
2766
2767 // Register for DIVI projection of divmodI
2768 const RegMask& Matcher::divI_proj_mask() {
2769 return INT_RAX_REG_mask();
2770 }
2771
2772 // Register for MODI projection of divmodI
2773 const RegMask& Matcher::modI_proj_mask() {
2774 return INT_RDX_REG_mask();
2775 }
2776
2777 // Register for DIVL projection of divmodL
2778 const RegMask& Matcher::divL_proj_mask() {
2779 return LONG_RAX_REG_mask();
2780 }
2781
2782 // Register for MODL projection of divmodL
2783 const RegMask& Matcher::modL_proj_mask() {
2784 return LONG_RDX_REG_mask();
2785 }
2786
2787 %}
2788
2789 source_hpp %{
2790 // Header information of the source block.
2791 // Method declarations/definitions which are used outside
2792 // the ad-scope can conveniently be defined here.
2793 //
2794 // To keep related declarations/definitions/uses close together,
2795 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2796
2797 #include "runtime/vm_version.hpp"
2798
2799 class NativeJump;
2800
2801 class CallStubImpl {
2802
2803 //--------------------------------------------------------------
2804 //---< Used for optimization in Compile::shorten_branches >---
2805 //--------------------------------------------------------------
2806
2807 public:
2808 // Size of call trampoline stub.
2809 static uint size_call_trampoline() {
2810 return 0; // no call trampolines on this platform
2811 }
2812
2813 // number of relocations needed by a call trampoline stub
2814 static uint reloc_call_trampoline() {
2815 return 0; // no call trampolines on this platform
2816 }
2817 };
2818
2819 class HandlerImpl {
2820
2821 public:
2822
2823 static int emit_deopt_handler(C2_MacroAssembler* masm);
2824
2825 static uint size_deopt_handler() {
2826 // one call and one jmp.
2827 return 7;
2828 }
2829 };
2830
2831 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2832 switch(bytes) {
2833 case 4: // fall-through
2834 case 8: // fall-through
2835 case 16: return Assembler::AVX_128bit;
2836 case 32: return Assembler::AVX_256bit;
2837 case 64: return Assembler::AVX_512bit;
2838
2839 default: {
2840 ShouldNotReachHere();
2841 return Assembler::AVX_NoVec;
2842 }
2843 }
2844 }
2845
2846 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2847 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2848 }
2849
2850 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2851 uint def_idx = use->operand_index(opnd);
2852 Node* def = use->in(def_idx);
2853 return vector_length_encoding(def);
2854 }
2855
2856 static inline bool is_vector_popcount_predicate(BasicType bt) {
2857 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2858 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2859 }
2860
2861 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2862 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2863 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2864 }
2865
2866 class Node::PD {
2867 public:
2868 enum NodeFlags : uint64_t {
2869 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2870 Flag_sets_carry_flag = Node::_last_flag << 2,
2871 Flag_sets_parity_flag = Node::_last_flag << 3,
2872 Flag_sets_zero_flag = Node::_last_flag << 4,
2873 Flag_sets_overflow_flag = Node::_last_flag << 5,
2874 Flag_sets_sign_flag = Node::_last_flag << 6,
2875 Flag_clears_carry_flag = Node::_last_flag << 7,
2876 Flag_clears_parity_flag = Node::_last_flag << 8,
2877 Flag_clears_zero_flag = Node::_last_flag << 9,
2878 Flag_clears_overflow_flag = Node::_last_flag << 10,
2879 Flag_clears_sign_flag = Node::_last_flag << 11,
2880 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2881 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2882 _last_flag = Flag_ndd_demotable_opr2
2883 };
2884 };
2885
2886 %} // end source_hpp
2887
2888 source %{
2889
2890 #include "opto/addnode.hpp"
2891 #include "c2_intelJccErratum_x86.hpp"
2892
2893 void PhaseOutput::pd_perform_mach_node_analysis() {
2894 if (VM_Version::has_intel_jcc_erratum()) {
2895 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2896 _buf_sizes._code += extra_padding;
2897 }
2898 }
2899
2900 int MachNode::pd_alignment_required() const {
2901 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2902 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2903 return IntelJccErratum::largest_jcc_size() + 1;
2904 } else {
2905 return 1;
2906 }
2907 }
2908
2909 int MachNode::compute_padding(int current_offset) const {
2910 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2911 Compile* C = Compile::current();
2912 PhaseOutput* output = C->output();
2913 Block* block = output->block();
2914 int index = output->index();
2915 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2916 } else {
2917 return 0;
2918 }
2919 }
2920
2921 // Emit deopt handler code.
2922 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2923
2924 // Note that the code buffer's insts_mark is always relative to insts.
2925 // That's why we must use the macroassembler to generate a handler.
2926 address base = __ start_a_stub(size_deopt_handler());
2927 if (base == nullptr) {
2928 ciEnv::current()->record_failure("CodeCache is full");
2929 return 0; // CodeBuffer::expand failed
2930 }
2931 int offset = __ offset();
2932
2933 Label start;
2934 __ bind(start);
2935
2936 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2937
2938 int entry_offset = __ offset();
2939
2940 __ jmp(start);
2941
2942 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2943 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2944 "out of bounds read in post-call NOP check");
2945 __ end_a_stub();
2946 return entry_offset;
2947 }
2948
2949 static Assembler::Width widthForType(BasicType bt) {
2950 if (bt == T_BYTE) {
2951 return Assembler::B;
2952 } else if (bt == T_SHORT) {
2953 return Assembler::W;
2954 } else if (bt == T_INT) {
2955 return Assembler::D;
2956 } else {
2957 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2958 return Assembler::Q;
2959 }
2960 }
2961
2962 //=============================================================================
2963
2964 // Float masks come from different places depending on platform.
2965 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2966 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2967 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2968 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2969 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2970 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2971 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2972 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2973 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2974 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2975 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2976 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2977 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2978 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2979 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2980 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2981 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2982 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2983 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2984
2985 //=============================================================================
2986 bool Matcher::match_rule_supported(int opcode) {
2987 if (!has_match_rule(opcode)) {
2988 return false; // no match rule present
2989 }
2990 switch (opcode) {
2991 case Op_AbsVL:
2992 case Op_StoreVectorScatter:
2993 if (UseAVX < 3) {
2994 return false;
2995 }
2996 break;
2997 case Op_PopCountI:
2998 case Op_PopCountL:
2999 if (!UsePopCountInstruction) {
3000 return false;
3001 }
3002 break;
3003 case Op_PopCountVI:
3004 if (UseAVX < 2) {
3005 return false;
3006 }
3007 break;
3008 case Op_CompressV:
3009 case Op_ExpandV:
3010 case Op_PopCountVL:
3011 if (UseAVX < 2) {
3012 return false;
3013 }
3014 break;
3015 case Op_MulVI:
3016 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3017 return false;
3018 }
3019 break;
3020 case Op_MulVL:
3021 if (UseSSE < 4) { // only with SSE4_1 or AVX
3022 return false;
3023 }
3024 break;
3025 case Op_MulReductionVL:
3026 if (VM_Version::supports_avx512dq() == false) {
3027 return false;
3028 }
3029 break;
3030 case Op_AbsVB:
3031 case Op_AbsVS:
3032 case Op_AbsVI:
3033 case Op_AddReductionVI:
3034 case Op_AndReductionV:
3035 case Op_OrReductionV:
3036 case Op_XorReductionV:
3037 if (UseSSE < 3) { // requires at least SSSE3
3038 return false;
3039 }
3040 break;
3041 case Op_MaxHF:
3042 case Op_MinHF:
3043 if (!VM_Version::supports_avx512vlbw()) {
3044 return false;
3045 } // fallthrough
3046 case Op_AddHF:
3047 case Op_DivHF:
3048 case Op_FmaHF:
3049 case Op_MulHF:
3050 case Op_ReinterpretS2HF:
3051 case Op_ReinterpretHF2S:
3052 case Op_SubHF:
3053 case Op_SqrtHF:
3054 if (!VM_Version::supports_avx512_fp16()) {
3055 return false;
3056 }
3057 break;
3058 case Op_VectorLoadShuffle:
3059 case Op_VectorRearrange:
3060 case Op_MulReductionVI:
3061 if (UseSSE < 4) { // requires at least SSE4
3062 return false;
3063 }
3064 break;
3065 case Op_IsInfiniteF:
3066 case Op_IsInfiniteD:
3067 if (!VM_Version::supports_avx512dq()) {
3068 return false;
3069 }
3070 break;
3071 case Op_SqrtVD:
3072 case Op_SqrtVF:
3073 case Op_VectorMaskCmp:
3074 case Op_VectorCastB2X:
3075 case Op_VectorCastS2X:
3076 case Op_VectorCastI2X:
3077 case Op_VectorCastL2X:
3078 case Op_VectorCastF2X:
3079 case Op_VectorCastD2X:
3080 case Op_VectorUCastB2X:
3081 case Op_VectorUCastS2X:
3082 case Op_VectorUCastI2X:
3083 case Op_VectorMaskCast:
3084 if (UseAVX < 1) { // enabled for AVX only
3085 return false;
3086 }
3087 break;
3088 case Op_PopulateIndex:
3089 if (UseAVX < 2) {
3090 return false;
3091 }
3092 break;
3093 case Op_RoundVF:
3094 if (UseAVX < 2) { // enabled for AVX2 only
3095 return false;
3096 }
3097 break;
3098 case Op_RoundVD:
3099 if (UseAVX < 3) {
3100 return false; // enabled for AVX3 only
3101 }
3102 break;
3103 case Op_CompareAndSwapL:
3104 case Op_CompareAndSwapP:
3105 break;
3106 case Op_StrIndexOf:
3107 if (!UseSSE42Intrinsics) {
3108 return false;
3109 }
3110 break;
3111 case Op_StrIndexOfChar:
3112 if (!UseSSE42Intrinsics) {
3113 return false;
3114 }
3115 break;
3116 case Op_OnSpinWait:
3117 if (VM_Version::supports_on_spin_wait() == false) {
3118 return false;
3119 }
3120 break;
3121 case Op_MulVB:
3122 case Op_LShiftVB:
3123 case Op_RShiftVB:
3124 case Op_URShiftVB:
3125 case Op_VectorInsert:
3126 case Op_VectorLoadMask:
3127 case Op_VectorStoreMask:
3128 case Op_VectorBlend:
3129 if (UseSSE < 4) {
3130 return false;
3131 }
3132 break;
3133 case Op_MaxD:
3134 case Op_MaxF:
3135 case Op_MinD:
3136 case Op_MinF:
3137 if (UseAVX < 1) { // enabled for AVX only
3138 return false;
3139 }
3140 break;
3141 case Op_CacheWB:
3142 case Op_CacheWBPreSync:
3143 case Op_CacheWBPostSync:
3144 if (!VM_Version::supports_data_cache_line_flush()) {
3145 return false;
3146 }
3147 break;
3148 case Op_ExtractB:
3149 case Op_ExtractL:
3150 case Op_ExtractI:
3151 case Op_RoundDoubleMode:
3152 if (UseSSE < 4) {
3153 return false;
3154 }
3155 break;
3156 case Op_RoundDoubleModeV:
3157 if (VM_Version::supports_avx() == false) {
3158 return false; // 128bit vroundpd is not available
3159 }
3160 break;
3161 case Op_LoadVectorGather:
3162 case Op_LoadVectorGatherMasked:
3163 if (UseAVX < 2) {
3164 return false;
3165 }
3166 break;
3167 case Op_FmaF:
3168 case Op_FmaD:
3169 case Op_FmaVD:
3170 case Op_FmaVF:
3171 if (!UseFMA) {
3172 return false;
3173 }
3174 break;
3175 case Op_MacroLogicV:
3176 if (UseAVX < 3 || !UseVectorMacroLogic) {
3177 return false;
3178 }
3179 break;
3180
3181 case Op_VectorCmpMasked:
3182 case Op_VectorMaskGen:
3183 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3184 return false;
3185 }
3186 break;
3187 case Op_VectorMaskFirstTrue:
3188 case Op_VectorMaskLastTrue:
3189 case Op_VectorMaskTrueCount:
3190 case Op_VectorMaskToLong:
3191 if (UseAVX < 1) {
3192 return false;
3193 }
3194 break;
3195 case Op_RoundF:
3196 case Op_RoundD:
3197 break;
3198 case Op_CopySignD:
3199 case Op_CopySignF:
3200 if (UseAVX < 3) {
3201 return false;
3202 }
3203 if (!VM_Version::supports_avx512vl()) {
3204 return false;
3205 }
3206 break;
3207 case Op_CompressBits:
3208 case Op_ExpandBits:
3209 if (!VM_Version::supports_bmi2()) {
3210 return false;
3211 }
3212 break;
3213 case Op_CompressM:
3214 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3215 return false;
3216 }
3217 break;
3218 case Op_ConvF2HF:
3219 case Op_ConvHF2F:
3220 if (!VM_Version::supports_float16()) {
3221 return false;
3222 }
3223 break;
3224 case Op_VectorCastF2HF:
3225 case Op_VectorCastHF2F:
3226 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3227 return false;
3228 }
3229 break;
3230 }
3231 return true; // Match rules are supported by default.
3232 }
3233
3234 //------------------------------------------------------------------------
3235
3236 static inline bool is_pop_count_instr_target(BasicType bt) {
3237 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3238 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3239 }
3240
3241 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3242 return match_rule_supported_vector(opcode, vlen, bt);
3243 }
3244
3245 // Identify extra cases that we might want to provide match rules for vector nodes and
3246 // other intrinsics guarded with vector length (vlen) and element type (bt).
3247 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3248 if (!match_rule_supported(opcode)) {
3249 return false;
3250 }
3251 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3252 // * SSE2 supports 128bit vectors for all types;
3253 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3254 // * AVX2 supports 256bit vectors for all types;
3255 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3256 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3257 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3258 // And MaxVectorSize is taken into account as well.
3259 if (!vector_size_supported(bt, vlen)) {
3260 return false;
3261 }
3262 // Special cases which require vector length follow:
3263 // * implementation limitations
3264 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3265 // * 128bit vroundpd instruction is present only in AVX1
3266 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3267 switch (opcode) {
3268 case Op_MaxVHF:
3269 case Op_MinVHF:
3270 if (!VM_Version::supports_avx512bw()) {
3271 return false;
3272 }
3273 case Op_AddVHF:
3274 case Op_DivVHF:
3275 case Op_FmaVHF:
3276 case Op_MulVHF:
3277 case Op_SubVHF:
3278 case Op_SqrtVHF:
3279 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3280 return false;
3281 }
3282 if (!VM_Version::supports_avx512_fp16()) {
3283 return false;
3284 }
3285 break;
3286 case Op_AbsVF:
3287 case Op_NegVF:
3288 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3289 return false; // 512bit vandps and vxorps are not available
3290 }
3291 break;
3292 case Op_AbsVD:
3293 case Op_NegVD:
3294 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3295 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3296 }
3297 break;
3298 case Op_RotateRightV:
3299 case Op_RotateLeftV:
3300 if (bt != T_INT && bt != T_LONG) {
3301 return false;
3302 } // fallthrough
3303 case Op_MacroLogicV:
3304 if (!VM_Version::supports_evex() ||
3305 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3306 return false;
3307 }
3308 break;
3309 case Op_ClearArray:
3310 case Op_VectorMaskGen:
3311 case Op_VectorCmpMasked:
3312 if (!VM_Version::supports_avx512bw()) {
3313 return false;
3314 }
3315 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3316 return false;
3317 }
3318 break;
3319 case Op_LoadVectorMasked:
3320 case Op_StoreVectorMasked:
3321 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3322 return false;
3323 }
3324 break;
3325 case Op_UMinV:
3326 case Op_UMaxV:
3327 if (UseAVX == 0) {
3328 return false;
3329 }
3330 break;
3331 case Op_UMinReductionV:
3332 case Op_UMaxReductionV:
3333 if (UseAVX == 0) {
3334 return false;
3335 }
3336 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3337 return false;
3338 }
3339 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3340 return false;
3341 }
3342 break;
3343 case Op_MaxV:
3344 case Op_MinV:
3345 if (UseSSE < 4 && is_integral_type(bt)) {
3346 return false;
3347 }
3348 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3349 // Float/Double intrinsics are enabled for AVX family currently.
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3354 return false;
3355 }
3356 }
3357 break;
3358 case Op_CallLeafVector:
3359 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3360 return false;
3361 }
3362 break;
3363 case Op_AddReductionVI:
3364 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3365 return false;
3366 }
3367 // fallthrough
3368 case Op_AndReductionV:
3369 case Op_OrReductionV:
3370 case Op_XorReductionV:
3371 if (is_subword_type(bt) && (UseSSE < 4)) {
3372 return false;
3373 }
3374 break;
3375 case Op_MinReductionV:
3376 case Op_MaxReductionV:
3377 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3378 return false;
3379 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3380 return false;
3381 }
3382 // Float/Double intrinsics enabled for AVX family.
3383 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3384 return false;
3385 }
3386 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorBlend:
3391 if (UseAVX == 0 && size_in_bits < 128) {
3392 return false;
3393 }
3394 break;
3395 case Op_VectorTest:
3396 if (UseSSE < 4) {
3397 return false; // Implementation limitation
3398 } else if (size_in_bits < 32) {
3399 return false; // Implementation limitation
3400 }
3401 break;
3402 case Op_VectorLoadShuffle:
3403 case Op_VectorRearrange:
3404 if(vlen == 2) {
3405 return false; // Implementation limitation due to how shuffle is loaded
3406 } else if (size_in_bits == 256 && UseAVX < 2) {
3407 return false; // Implementation limitation
3408 }
3409 break;
3410 case Op_VectorLoadMask:
3411 case Op_VectorMaskCast:
3412 if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 // fallthrough
3416 case Op_VectorStoreMask:
3417 if (vlen == 2) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_PopulateIndex:
3422 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3423 return false;
3424 }
3425 break;
3426 case Op_VectorCastB2X:
3427 case Op_VectorCastS2X:
3428 case Op_VectorCastI2X:
3429 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3430 return false;
3431 }
3432 break;
3433 case Op_VectorCastL2X:
3434 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3435 return false;
3436 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3437 return false;
3438 }
3439 break;
3440 case Op_VectorCastF2X: {
3441 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3442 // happen after intermediate conversion to integer and special handling
3443 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3444 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3445 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3446 return false;
3447 }
3448 }
3449 // fallthrough
3450 case Op_VectorCastD2X:
3451 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3452 return false;
3453 }
3454 break;
3455 case Op_VectorCastF2HF:
3456 case Op_VectorCastHF2F:
3457 if (!VM_Version::supports_f16c() &&
3458 ((!VM_Version::supports_evex() ||
3459 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3460 return false;
3461 }
3462 break;
3463 case Op_RoundVD:
3464 if (!VM_Version::supports_avx512dq()) {
3465 return false;
3466 }
3467 break;
3468 case Op_MulReductionVI:
3469 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3470 return false;
3471 }
3472 break;
3473 case Op_LoadVectorGatherMasked:
3474 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3475 return false;
3476 }
3477 if (is_subword_type(bt) &&
3478 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3479 (size_in_bits < 64) ||
3480 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3481 return false;
3482 }
3483 break;
3484 case Op_StoreVectorScatterMasked:
3485 case Op_StoreVectorScatter:
3486 if (is_subword_type(bt)) {
3487 return false;
3488 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3489 return false;
3490 }
3491 // fallthrough
3492 case Op_LoadVectorGather:
3493 if (!is_subword_type(bt) && size_in_bits == 64) {
3494 return false;
3495 }
3496 if (is_subword_type(bt) && size_in_bits < 64) {
3497 return false;
3498 }
3499 break;
3500 case Op_SaturatingAddV:
3501 case Op_SaturatingSubV:
3502 if (UseAVX < 1) {
3503 return false; // Implementation limitation
3504 }
3505 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3506 return false;
3507 }
3508 break;
3509 case Op_SelectFromTwoVector:
3510 if (size_in_bits < 128) {
3511 return false;
3512 }
3513 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3514 return false;
3515 }
3516 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3517 return false;
3518 }
3519 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3520 return false;
3521 }
3522 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3523 return false;
3524 }
3525 break;
3526 case Op_MaskAll:
3527 if (!VM_Version::supports_evex()) {
3528 return false;
3529 }
3530 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3531 return false;
3532 }
3533 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 break;
3537 case Op_VectorMaskCmp:
3538 if (vlen < 2 || size_in_bits < 32) {
3539 return false;
3540 }
3541 break;
3542 case Op_CompressM:
3543 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3544 return false;
3545 }
3546 break;
3547 case Op_CompressV:
3548 case Op_ExpandV:
3549 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3550 return false;
3551 }
3552 if (size_in_bits < 128 ) {
3553 return false;
3554 }
3555 case Op_VectorLongToMask:
3556 if (UseAVX < 1) {
3557 return false;
3558 }
3559 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3560 return false;
3561 }
3562 break;
3563 case Op_SignumVD:
3564 case Op_SignumVF:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 break;
3569 case Op_PopCountVI:
3570 case Op_PopCountVL: {
3571 if (!is_pop_count_instr_target(bt) &&
3572 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3573 return false;
3574 }
3575 }
3576 break;
3577 case Op_ReverseV:
3578 case Op_ReverseBytesV:
3579 if (UseAVX < 2) {
3580 return false;
3581 }
3582 break;
3583 case Op_CountTrailingZerosV:
3584 case Op_CountLeadingZerosV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 }
3590 return true; // Per default match rules are supported.
3591 }
3592
3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3594 // ADLC based match_rule_supported routine checks for the existence of pattern based
3595 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3596 // of their non-masked counterpart with mask edge being the differentiator.
3597 // This routine does a strict check on the existence of masked operation patterns
3598 // by returning a default false value for all the other opcodes apart from the
3599 // ones whose masked instruction patterns are defined in this file.
3600 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3601 return false;
3602 }
3603
3604 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3605 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3606 return false;
3607 }
3608 switch(opcode) {
3609 // Unary masked operations
3610 case Op_AbsVB:
3611 case Op_AbsVS:
3612 if(!VM_Version::supports_avx512bw()) {
3613 return false; // Implementation limitation
3614 }
3615 case Op_AbsVI:
3616 case Op_AbsVL:
3617 return true;
3618
3619 // Ternary masked operations
3620 case Op_FmaVF:
3621 case Op_FmaVD:
3622 return true;
3623
3624 case Op_MacroLogicV:
3625 if(bt != T_INT && bt != T_LONG) {
3626 return false;
3627 }
3628 return true;
3629
3630 // Binary masked operations
3631 case Op_AddVB:
3632 case Op_AddVS:
3633 case Op_SubVB:
3634 case Op_SubVS:
3635 case Op_MulVS:
3636 case Op_LShiftVS:
3637 case Op_RShiftVS:
3638 case Op_URShiftVS:
3639 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3640 if (!VM_Version::supports_avx512bw()) {
3641 return false; // Implementation limitation
3642 }
3643 return true;
3644
3645 case Op_MulVL:
3646 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3647 if (!VM_Version::supports_avx512dq()) {
3648 return false; // Implementation limitation
3649 }
3650 return true;
3651
3652 case Op_AndV:
3653 case Op_OrV:
3654 case Op_XorV:
3655 case Op_RotateRightV:
3656 case Op_RotateLeftV:
3657 if (bt != T_INT && bt != T_LONG) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_VectorLoadMask:
3663 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3664 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3665 return false;
3666 }
3667 return true;
3668
3669 case Op_AddVI:
3670 case Op_AddVL:
3671 case Op_AddVF:
3672 case Op_AddVD:
3673 case Op_SubVI:
3674 case Op_SubVL:
3675 case Op_SubVF:
3676 case Op_SubVD:
3677 case Op_MulVI:
3678 case Op_MulVF:
3679 case Op_MulVD:
3680 case Op_DivVF:
3681 case Op_DivVD:
3682 case Op_SqrtVF:
3683 case Op_SqrtVD:
3684 case Op_LShiftVI:
3685 case Op_LShiftVL:
3686 case Op_RShiftVI:
3687 case Op_RShiftVL:
3688 case Op_URShiftVI:
3689 case Op_URShiftVL:
3690 case Op_LoadVectorMasked:
3691 case Op_StoreVectorMasked:
3692 case Op_LoadVectorGatherMasked:
3693 case Op_StoreVectorScatterMasked:
3694 return true;
3695
3696 case Op_UMinV:
3697 case Op_UMaxV:
3698 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3699 return false;
3700 } // fallthrough
3701 case Op_MaxV:
3702 case Op_MinV:
3703 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3704 return false; // Implementation limitation
3705 }
3706 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3707 return false; // Implementation limitation
3708 }
3709 return true;
3710 case Op_SaturatingAddV:
3711 case Op_SaturatingSubV:
3712 if (!is_subword_type(bt)) {
3713 return false;
3714 }
3715 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719
3720 case Op_VectorMaskCmp:
3721 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorRearrange:
3727 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3731 return false; // Implementation limitation
3732 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 // Binary Logical operations
3738 case Op_AndVMask:
3739 case Op_OrVMask:
3740 case Op_XorVMask:
3741 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 case Op_PopCountVI:
3747 case Op_PopCountVL:
3748 if (!is_pop_count_instr_target(bt)) {
3749 return false;
3750 }
3751 return true;
3752
3753 case Op_MaskAll:
3754 return true;
3755
3756 case Op_CountLeadingZerosV:
3757 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3758 return true;
3759 }
3760 default:
3761 return false;
3762 }
3763 }
3764
3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3766 return false;
3767 }
3768
3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3771 switch (elem_bt) {
3772 case T_BYTE: return false;
3773 case T_SHORT: return !VM_Version::supports_avx512bw();
3774 case T_INT: return !VM_Version::supports_avx();
3775 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3776 default:
3777 ShouldNotReachHere();
3778 return false;
3779 }
3780 }
3781
3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3783 // Prefer predicate if the mask type is "TypePVectMask".
3784 return vt->isa_pvectmask() != nullptr;
3785 }
3786
3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3788 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3789 bool legacy = (generic_opnd->opcode() == LEGVEC);
3790 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3791 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3792 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3793 return new legVecZOper();
3794 }
3795 if (legacy) {
3796 switch (ideal_reg) {
3797 case Op_VecS: return new legVecSOper();
3798 case Op_VecD: return new legVecDOper();
3799 case Op_VecX: return new legVecXOper();
3800 case Op_VecY: return new legVecYOper();
3801 case Op_VecZ: return new legVecZOper();
3802 }
3803 } else {
3804 switch (ideal_reg) {
3805 case Op_VecS: return new vecSOper();
3806 case Op_VecD: return new vecDOper();
3807 case Op_VecX: return new vecXOper();
3808 case Op_VecY: return new vecYOper();
3809 case Op_VecZ: return new vecZOper();
3810 }
3811 }
3812 ShouldNotReachHere();
3813 return nullptr;
3814 }
3815
3816 bool Matcher::is_reg2reg_move(MachNode* m) {
3817 switch (m->rule()) {
3818 case MoveVec2Leg_rule:
3819 case MoveLeg2Vec_rule:
3820 case MoveF2VL_rule:
3821 case MoveF2LEG_rule:
3822 case MoveVL2F_rule:
3823 case MoveLEG2F_rule:
3824 case MoveD2VL_rule:
3825 case MoveD2LEG_rule:
3826 case MoveVL2D_rule:
3827 case MoveLEG2D_rule:
3828 return true;
3829 default:
3830 return false;
3831 }
3832 }
3833
3834 bool Matcher::is_generic_vector(MachOper* opnd) {
3835 switch (opnd->opcode()) {
3836 case VEC:
3837 case LEGVEC:
3838 return true;
3839 default:
3840 return false;
3841 }
3842 }
3843
3844 //------------------------------------------------------------------------
3845
3846 const RegMask* Matcher::predicate_reg_mask(void) {
3847 return &_VECTMASK_REG_mask;
3848 }
3849
3850 // Max vector size in bytes. 0 if not supported.
3851 int Matcher::vector_width_in_bytes(BasicType bt) {
3852 assert(is_java_primitive(bt), "only primitive type vectors");
3853 // SSE2 supports 128bit vectors for all types.
3854 // AVX2 supports 256bit vectors for all types.
3855 // AVX2/EVEX supports 512bit vectors for all types.
3856 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3857 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3858 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3859 size = (UseAVX > 2) ? 64 : 32;
3860 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3861 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3862 // Use flag to limit vector size.
3863 size = MIN2(size,(int)MaxVectorSize);
3864 // Minimum 2 values in vector (or 4 for bytes).
3865 switch (bt) {
3866 case T_DOUBLE:
3867 case T_LONG:
3868 if (size < 16) return 0;
3869 break;
3870 case T_FLOAT:
3871 case T_INT:
3872 if (size < 8) return 0;
3873 break;
3874 case T_BOOLEAN:
3875 if (size < 4) return 0;
3876 break;
3877 case T_CHAR:
3878 if (size < 4) return 0;
3879 break;
3880 case T_BYTE:
3881 if (size < 4) return 0;
3882 break;
3883 case T_SHORT:
3884 if (size < 4) return 0;
3885 break;
3886 default:
3887 ShouldNotReachHere();
3888 }
3889 return size;
3890 }
3891
3892 // Limits on vector size (number of elements) loaded into vector.
3893 int Matcher::max_vector_size(const BasicType bt) {
3894 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3895 }
3896 int Matcher::min_vector_size(const BasicType bt) {
3897 int max_size = max_vector_size(bt);
3898 // Min size which can be loaded into vector is 4 bytes.
3899 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3900 // Support for calling svml double64 vectors
3901 if (bt == T_DOUBLE) {
3902 size = 1;
3903 }
3904 return MIN2(size,max_size);
3905 }
3906
3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3908 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3909 // by default on Cascade Lake
3910 if (VM_Version::is_default_intel_cascade_lake()) {
3911 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3912 }
3913 return Matcher::max_vector_size(bt);
3914 }
3915
3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3917 return -1;
3918 }
3919
3920 // Vector ideal reg corresponding to specified size in bytes
3921 uint Matcher::vector_ideal_reg(int size) {
3922 assert(MaxVectorSize >= size, "");
3923 switch(size) {
3924 case 4: return Op_VecS;
3925 case 8: return Op_VecD;
3926 case 16: return Op_VecX;
3927 case 32: return Op_VecY;
3928 case 64: return Op_VecZ;
3929 }
3930 ShouldNotReachHere();
3931 return 0;
3932 }
3933
3934 // Check for shift by small constant as well
3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3936 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3937 shift->in(2)->get_int() <= 3 &&
3938 // Are there other uses besides address expressions?
3939 !matcher->is_visited(shift)) {
3940 address_visited.set(shift->_idx); // Flag as address_visited
3941 mstack.push(shift->in(2), Matcher::Visit);
3942 Node *conv = shift->in(1);
3943 // Allow Matcher to match the rule which bypass
3944 // ConvI2L operation for an array index on LP64
3945 // if the index value is positive.
3946 if (conv->Opcode() == Op_ConvI2L &&
3947 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3948 // Are there other uses besides address expressions?
3949 !matcher->is_visited(conv)) {
3950 address_visited.set(conv->_idx); // Flag as address_visited
3951 mstack.push(conv->in(1), Matcher::Pre_Visit);
3952 } else {
3953 mstack.push(conv, Matcher::Pre_Visit);
3954 }
3955 return true;
3956 }
3957 return false;
3958 }
3959
3960 // This function identifies sub-graphs in which a 'load' node is
3961 // input to two different nodes, and such that it can be matched
3962 // with BMI instructions like blsi, blsr, etc.
3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3965 // refers to the same node.
3966 //
3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3968 // This is a temporary solution until we make DAGs expressible in ADL.
3969 template<typename ConType>
3970 class FusedPatternMatcher {
3971 Node* _op1_node;
3972 Node* _mop_node;
3973 int _con_op;
3974
3975 static int match_next(Node* n, int next_op, int next_op_idx) {
3976 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3977 return -1;
3978 }
3979
3980 if (next_op_idx == -1) { // n is commutative, try rotations
3981 if (n->in(1)->Opcode() == next_op) {
3982 return 1;
3983 } else if (n->in(2)->Opcode() == next_op) {
3984 return 2;
3985 }
3986 } else {
3987 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3988 if (n->in(next_op_idx)->Opcode() == next_op) {
3989 return next_op_idx;
3990 }
3991 }
3992 return -1;
3993 }
3994
3995 public:
3996 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3997 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3998
3999 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4000 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4001 typename ConType::NativeType con_value) {
4002 if (_op1_node->Opcode() != op1) {
4003 return false;
4004 }
4005 if (_mop_node->outcnt() > 2) {
4006 return false;
4007 }
4008 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4009 if (op1_op2_idx == -1) {
4010 return false;
4011 }
4012 // Memory operation must be the other edge
4013 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4014
4015 // Check that the mop node is really what we want
4016 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4017 Node* op2_node = _op1_node->in(op1_op2_idx);
4018 if (op2_node->outcnt() > 1) {
4019 return false;
4020 }
4021 assert(op2_node->Opcode() == op2, "Should be");
4022 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4023 if (op2_con_idx == -1) {
4024 return false;
4025 }
4026 // Memory operation must be the other edge
4027 int op2_mop_idx = (op2_con_idx & 1) + 1;
4028 // Check that the memory operation is the same node
4029 if (op2_node->in(op2_mop_idx) == _mop_node) {
4030 // Now check the constant
4031 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4032 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4033 return true;
4034 }
4035 }
4036 }
4037 return false;
4038 }
4039 };
4040
4041 static bool is_bmi_pattern(Node* n, Node* m) {
4042 assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
4043 if (n != nullptr && m != nullptr) {
4044 if (m->Opcode() == Op_LoadI) {
4045 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4046 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4047 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4048 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4049 } else if (m->Opcode() == Op_LoadL) {
4050 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4051 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4052 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4053 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4054 }
4055 }
4056 return false;
4057 }
4058
4059 // Should the matcher clone input 'm' of node 'n'?
4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4061 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4062 if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
4063 mstack.push(m, Visit);
4064 return true;
4065 }
4066 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4067 mstack.push(m, Visit); // m = ShiftCntV
4068 return true;
4069 }
4070 if (is_encode_and_store_pattern(n, m)) {
4071 mstack.push(m, Visit);
4072 return true;
4073 }
4074 return false;
4075 }
4076
4077 // Should the Matcher clone shifts on addressing modes, expecting them
4078 // to be subsumed into complex addressing expressions or compute them
4079 // into registers?
4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4081 Node *off = m->in(AddPNode::Offset);
4082 if (off->is_Con()) {
4083 address_visited.test_set(m->_idx); // Flag as address_visited
4084 Node *adr = m->in(AddPNode::Address);
4085
4086 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4087 // AtomicAdd is not an addressing expression.
4088 // Cheap to find it by looking for screwy base.
4089 if (adr->is_AddP() &&
4090 !adr->in(AddPNode::Base)->is_top() &&
4091 !adr->in(AddPNode::Offset)->is_Con() &&
4092 off->get_long() == (int) (off->get_long()) && // immL32
4093 // Are there other uses besides address expressions?
4094 !is_visited(adr)) {
4095 address_visited.set(adr->_idx); // Flag as address_visited
4096 Node *shift = adr->in(AddPNode::Offset);
4097 if (!clone_shift(shift, this, mstack, address_visited)) {
4098 mstack.push(shift, Pre_Visit);
4099 }
4100 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4101 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4102 } else {
4103 mstack.push(adr, Pre_Visit);
4104 }
4105
4106 // Clone X+offset as it also folds into most addressing expressions
4107 mstack.push(off, Visit);
4108 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4109 return true;
4110 } else if (clone_shift(off, this, mstack, address_visited)) {
4111 address_visited.test_set(m->_idx); // Flag as address_visited
4112 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4114 return true;
4115 }
4116 return false;
4117 }
4118
4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4120 switch (bt) {
4121 case BoolTest::eq:
4122 return Assembler::eq;
4123 case BoolTest::ne:
4124 return Assembler::neq;
4125 case BoolTest::le:
4126 case BoolTest::ule:
4127 return Assembler::le;
4128 case BoolTest::ge:
4129 case BoolTest::uge:
4130 return Assembler::nlt;
4131 case BoolTest::lt:
4132 case BoolTest::ult:
4133 return Assembler::lt;
4134 case BoolTest::gt:
4135 case BoolTest::ugt:
4136 return Assembler::nle;
4137 default : ShouldNotReachHere(); return Assembler::_false;
4138 }
4139 }
4140
4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4142 switch (bt) {
4143 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4144 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4145 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4146 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4147 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4148 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4149 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4150 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4151 }
4152 }
4153
4154 // Helper methods for MachSpillCopyNode::implementation().
4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4156 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4157 assert(ireg == Op_VecS || // 32bit vector
4158 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4159 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4160 "no non-adjacent vector moves" );
4161 if (masm) {
4162 switch (ireg) {
4163 case Op_VecS: // copy whole register
4164 case Op_VecD:
4165 case Op_VecX:
4166 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4167 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4168 } else {
4169 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4170 }
4171 break;
4172 case Op_VecY:
4173 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4174 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4175 } else {
4176 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4177 }
4178 break;
4179 case Op_VecZ:
4180 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4181 break;
4182 default:
4183 ShouldNotReachHere();
4184 }
4185 #ifndef PRODUCT
4186 } else {
4187 switch (ireg) {
4188 case Op_VecS:
4189 case Op_VecD:
4190 case Op_VecX:
4191 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4192 break;
4193 case Op_VecY:
4194 case Op_VecZ:
4195 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4196 break;
4197 default:
4198 ShouldNotReachHere();
4199 }
4200 #endif
4201 }
4202 }
4203
4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4205 int stack_offset, int reg, uint ireg, outputStream* st) {
4206 if (masm) {
4207 if (is_load) {
4208 switch (ireg) {
4209 case Op_VecS:
4210 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4211 break;
4212 case Op_VecD:
4213 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4214 break;
4215 case Op_VecX:
4216 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4217 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4218 } else {
4219 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4220 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4221 }
4222 break;
4223 case Op_VecY:
4224 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4225 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 } else {
4227 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4228 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4229 }
4230 break;
4231 case Op_VecZ:
4232 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4233 break;
4234 default:
4235 ShouldNotReachHere();
4236 }
4237 } else { // store
4238 switch (ireg) {
4239 case Op_VecS:
4240 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4241 break;
4242 case Op_VecD:
4243 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4244 break;
4245 case Op_VecX:
4246 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4247 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4248 }
4249 else {
4250 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4251 }
4252 break;
4253 case Op_VecY:
4254 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4255 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 }
4257 else {
4258 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4259 }
4260 break;
4261 case Op_VecZ:
4262 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4263 break;
4264 default:
4265 ShouldNotReachHere();
4266 }
4267 }
4268 #ifndef PRODUCT
4269 } else {
4270 if (is_load) {
4271 switch (ireg) {
4272 case Op_VecS:
4273 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4274 break;
4275 case Op_VecD:
4276 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4277 break;
4278 case Op_VecX:
4279 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecY:
4282 case Op_VecZ:
4283 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4284 break;
4285 default:
4286 ShouldNotReachHere();
4287 }
4288 } else { // store
4289 switch (ireg) {
4290 case Op_VecS:
4291 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4292 break;
4293 case Op_VecD:
4294 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4295 break;
4296 case Op_VecX:
4297 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecY:
4300 case Op_VecZ:
4301 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4302 break;
4303 default:
4304 ShouldNotReachHere();
4305 }
4306 }
4307 #endif
4308 }
4309 }
4310
4311 template <class T>
4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4313 int size = type2aelembytes(bt) * len;
4314 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4315 for (int i = 0; i < len; i++) {
4316 int offset = i * type2aelembytes(bt);
4317 switch (bt) {
4318 case T_BYTE: val->at(i) = con; break;
4319 case T_SHORT: {
4320 jshort c = con;
4321 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4322 break;
4323 }
4324 case T_INT: {
4325 jint c = con;
4326 memcpy(val->adr_at(offset), &c, sizeof(jint));
4327 break;
4328 }
4329 case T_LONG: {
4330 jlong c = con;
4331 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4332 break;
4333 }
4334 case T_FLOAT: {
4335 jfloat c = con;
4336 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4337 break;
4338 }
4339 case T_DOUBLE: {
4340 jdouble c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4342 break;
4343 }
4344 default: assert(false, "%s", type2name(bt));
4345 }
4346 }
4347 return val;
4348 }
4349
4350 static inline jlong high_bit_set(BasicType bt) {
4351 switch (bt) {
4352 case T_BYTE: return 0x8080808080808080;
4353 case T_SHORT: return 0x8000800080008000;
4354 case T_INT: return 0x8000000080000000;
4355 case T_LONG: return 0x8000000000000000;
4356 default:
4357 ShouldNotReachHere();
4358 return 0;
4359 }
4360 }
4361
4362 #ifndef PRODUCT
4363 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4364 st->print("nop \t# %d bytes pad for loops and calls", _count);
4365 }
4366 #endif
4367
4368 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4369 __ nop(_count);
4370 }
4371
4372 uint MachNopNode::size(PhaseRegAlloc*) const {
4373 return _count;
4374 }
4375
4376 #ifndef PRODUCT
4377 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4378 st->print("# breakpoint");
4379 }
4380 #endif
4381
4382 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4383 __ int3();
4384 }
4385
4386 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4387 return MachNode::size(ra_);
4388 }
4389
4390 %}
4391
4392 //----------ENCODING BLOCK-----------------------------------------------------
4393 // This block specifies the encoding classes used by the compiler to
4394 // output byte streams. Encoding classes are parameterized macros
4395 // used by Machine Instruction Nodes in order to generate the bit
4396 // encoding of the instruction. Operands specify their base encoding
4397 // interface with the interface keyword. There are currently
4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4399 // COND_INTER. REG_INTER causes an operand to generate a function
4400 // which returns its register number when queried. CONST_INTER causes
4401 // an operand to generate a function which returns the value of the
4402 // constant when queried. MEMORY_INTER causes an operand to generate
4403 // four functions which return the Base Register, the Index Register,
4404 // the Scale Value, and the Offset Value of the operand when queried.
4405 // COND_INTER causes an operand to generate six functions which return
4406 // the encoding code (ie - encoding bits for the instruction)
4407 // associated with each basic boolean condition for a conditional
4408 // instruction.
4409 //
4410 // Instructions specify two basic values for encoding. Again, a
4411 // function is available to check if the constant displacement is an
4412 // oop. They use the ins_encode keyword to specify their encoding
4413 // classes (which must be a sequence of enc_class names, and their
4414 // parameters, specified in the encoding block), and they use the
4415 // opcode keyword to specify, in order, their primary, secondary, and
4416 // tertiary opcode. Only the opcode sections which a particular
4417 // instruction needs for encoding need to be specified.
4418 encode %{
4419 enc_class cdql_enc(no_rax_rdx_RegI div)
4420 %{
4421 // Full implementation of Java idiv and irem; checks for
4422 // special case as described in JVM spec., p.243 & p.271.
4423 //
4424 // normal case special case
4425 //
4426 // input : rax: dividend min_int
4427 // reg: divisor -1
4428 //
4429 // output: rax: quotient (= rax idiv reg) min_int
4430 // rdx: remainder (= rax irem reg) 0
4431 //
4432 // Code sequnce:
4433 //
4434 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4435 // 5: 75 07/08 jne e <normal>
4436 // 7: 33 d2 xor %edx,%edx
4437 // [div >= 8 -> offset + 1]
4438 // [REX_B]
4439 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4440 // c: 74 03/04 je 11 <done>
4441 // 000000000000000e <normal>:
4442 // e: 99 cltd
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // f: f7 f9 idiv $div
4446 // 0000000000000011 <done>:
4447 Label normal;
4448 Label done;
4449
4450 // cmp $0x80000000,%eax
4451 __ cmpl(as_Register(RAX_enc), 0x80000000);
4452
4453 // jne e <normal>
4454 __ jccb(Assembler::notEqual, normal);
4455
4456 // xor %edx,%edx
4457 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4458
4459 // cmp $0xffffffffffffffff,%ecx
4460 __ cmpl($div$$Register, -1);
4461
4462 // je 11 <done>
4463 __ jccb(Assembler::equal, done);
4464
4465 // <normal>
4466 // cltd
4467 __ bind(normal);
4468 __ cdql();
4469
4470 // idivl
4471 // <done>
4472 __ idivl($div$$Register);
4473 __ bind(done);
4474 %}
4475
4476 enc_class cdqq_enc(no_rax_rdx_RegL div)
4477 %{
4478 // Full implementation of Java ldiv and lrem; checks for
4479 // special case as described in JVM spec., p.243 & p.271.
4480 //
4481 // normal case special case
4482 //
4483 // input : rax: dividend min_long
4484 // reg: divisor -1
4485 //
4486 // output: rax: quotient (= rax idiv reg) min_long
4487 // rdx: remainder (= rax irem reg) 0
4488 //
4489 // Code sequnce:
4490 //
4491 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4492 // 7: 00 00 80
4493 // a: 48 39 d0 cmp %rdx,%rax
4494 // d: 75 08 jne 17 <normal>
4495 // f: 33 d2 xor %edx,%edx
4496 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4497 // 15: 74 05 je 1c <done>
4498 // 0000000000000017 <normal>:
4499 // 17: 48 99 cqto
4500 // 19: 48 f7 f9 idiv $div
4501 // 000000000000001c <done>:
4502 Label normal;
4503 Label done;
4504
4505 // mov $0x8000000000000000,%rdx
4506 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4507
4508 // cmp %rdx,%rax
4509 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4510
4511 // jne 17 <normal>
4512 __ jccb(Assembler::notEqual, normal);
4513
4514 // xor %edx,%edx
4515 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4516
4517 // cmp $0xffffffffffffffff,$div
4518 __ cmpq($div$$Register, -1);
4519
4520 // je 1e <done>
4521 __ jccb(Assembler::equal, done);
4522
4523 // <normal>
4524 // cqto
4525 __ bind(normal);
4526 __ cdqq();
4527
4528 // idivq (note: must be emitted by the user of this rule)
4529 // <done>
4530 __ idivq($div$$Register);
4531 __ bind(done);
4532 %}
4533
4534 enc_class clear_avx %{
4535 DEBUG_ONLY(int off0 = __ offset());
4536 if (generate_vzeroupper(Compile::current())) {
4537 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4538 // Clear upper bits of YMM registers when current compiled code uses
4539 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4540 __ vzeroupper();
4541 }
4542 DEBUG_ONLY(int off1 = __ offset());
4543 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4544 %}
4545
4546 enc_class Java_To_Runtime(method meth) %{
4547 __ lea(r10, RuntimeAddress((address)$meth$$method));
4548 __ call(r10);
4549 __ post_call_nop();
4550 %}
4551
4552 enc_class Java_Static_Call(method meth)
4553 %{
4554 // JAVA STATIC CALL
4555 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4556 // determine who we intended to call.
4557 if (!_method) {
4558 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4559 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4560 // The NOP here is purely to ensure that eliding a call to
4561 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4562 __ nop(5);
4563 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4564 } else {
4565 int method_index = resolved_method_index(masm);
4566 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4567 : static_call_Relocation::spec(method_index);
4568 address mark = __ pc();
4569 int call_offset = __ offset();
4570 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4571 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4572 // Calls of the same statically bound method can share
4573 // a stub to the interpreter.
4574 __ code()->shared_stub_to_interp_for(_method, call_offset);
4575 } else {
4576 // Emit stubs for static call.
4577 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4578 __ clear_inst_mark();
4579 if (stub == nullptr) {
4580 ciEnv::current()->record_failure("CodeCache is full");
4581 return;
4582 }
4583 }
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
4624 // | SP-+--------+----> Matcher::_old_SP, even aligned
4625 // | | in | 3 area for Intel ret address
4626 // Owned by |preserve| Empty on Sparc.
4627 // SELF +--------+
4628 // | | pad2 | 2 pad to align old SP
4629 // | +--------+ 1
4630 // | | locks | 0
4631 // | +--------+----> OptoReg::stack0(), even aligned
4632 // | | pad1 | 11 pad to align new SP
4633 // | +--------+
4634 // | | | 10
4635 // | | spills | 9 spills
4636 // V | | 8 (pad0 slot for callee)
4637 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4638 // ^ | out | 7
4639 // | | args | 6 Holes in outgoing args owned by CALLEE
4640 // Owned by +--------+
4641 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4642 // | new |preserve| Must be even-aligned.
4643 // | SP-+--------+----> Matcher::_new_SP, even aligned
4644 // | | |
4645 //
4646 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4647 // known from SELF's arguments and the Java calling convention.
4648 // Region 6-7 is determined per call site.
4649 // Note 2: If the calling convention leaves holes in the incoming argument
4650 // area, those holes are owned by SELF. Holes in the outgoing area
4651 // are owned by the CALLEE. Holes should not be necessary in the
4652 // incoming area, as the Java calling convention is completely under
4653 // the control of the AD file. Doubles can be sorted and packed to
4654 // avoid holes. Holes in the outgoing arguments may be necessary for
4655 // varargs C calling conventions.
4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4657 // even aligned with pad0 as needed.
4658 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4659 // region 6-11 is even aligned; it may be padded out more so that
4660 // the region from SP to FP meets the minimum stack alignment.
4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4662 // alignment. Region 11, pad1, may be dynamically extended so that
4663 // SP meets the minimum alignment.
4664
4665 frame
4666 %{
4667 // These three registers define part of the calling convention
4668 // between compiled code and the interpreter.
4669 inline_cache_reg(RAX); // Inline Cache Register
4670
4671 // Optional: name the operand used by cisc-spilling to access
4672 // [stack_pointer + offset]
4673 cisc_spilling_operand_name(indOffset32);
4674
4675 // Number of stack slots consumed by locking an object
4676 sync_stack_slots(2);
4677
4678 // Compiled code's Frame Pointer
4679 frame_pointer(RSP);
4680
4681 // Stack alignment requirement
4682 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4683
4684 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4685 // for calls to C. Supports the var-args backing area for register parms.
4686 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4687
4688 // The after-PROLOG location of the return address. Location of
4689 // return address specifies a type (REG or STACK) and a number
4690 // representing the register number (i.e. - use a register name) or
4691 // stack slot.
4692 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4693 // Otherwise, it is above the locks and verification slot and alignment word
4694 return_addr(STACK - 2 +
4695 align_up((Compile::current()->in_preserve_stack_slots() +
4696 Compile::current()->fixed_slots()),
4697 stack_alignment_in_slots()));
4698
4699 // Location of compiled Java return values. Same as C for now.
4700 return_value
4701 %{
4702 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4703 "only return normal values");
4704
4705 static const int lo[Op_RegL + 1] = {
4706 0,
4707 0,
4708 RAX_num, // Op_RegN
4709 RAX_num, // Op_RegI
4710 RAX_num, // Op_RegP
4711 XMM0_num, // Op_RegF
4712 XMM0_num, // Op_RegD
4713 RAX_num // Op_RegL
4714 };
4715 static const int hi[Op_RegL + 1] = {
4716 0,
4717 0,
4718 OptoReg::Bad, // Op_RegN
4719 OptoReg::Bad, // Op_RegI
4720 RAX_H_num, // Op_RegP
4721 OptoReg::Bad, // Op_RegF
4722 XMM0b_num, // Op_RegD
4723 RAX_H_num // Op_RegL
4724 };
4725 // Excluded flags and vector registers.
4726 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4727 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4728 %}
4729 %}
4730
4731 //----------ATTRIBUTES---------------------------------------------------------
4732 //----------Operand Attributes-------------------------------------------------
4733 op_attrib op_cost(0); // Required cost attribute
4734
4735 //----------Instruction Attributes---------------------------------------------
4736 ins_attrib ins_cost(100); // Required cost attribute
4737 ins_attrib ins_size(8); // Required size attribute (in bits)
4738 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4739 // a non-matching short branch variant
4740 // of some long branch?
4741 ins_attrib ins_alignment(1); // Required alignment attribute (must
4742 // be a power of 2) specifies the
4743 // alignment that some part of the
4744 // instruction (not necessarily the
4745 // start) requires. If > 1, a
4746 // compute_padding() function must be
4747 // provided for the instruction
4748
4749 // Whether this node is expanded during code emission into a sequence of
4750 // instructions and the first instruction can perform an implicit null check.
4751 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4752
4753 //----------OPERANDS-----------------------------------------------------------
4754 // Operand definitions must precede instruction definitions for correct parsing
4755 // in the ADLC because operands constitute user defined types which are used in
4756 // instruction definitions.
4757
4758 //----------Simple Operands----------------------------------------------------
4759 // Immediate Operands
4760 // Integer Immediate
4761 operand immI()
4762 %{
4763 match(ConI);
4764
4765 op_cost(10);
4766 format %{ %}
4767 interface(CONST_INTER);
4768 %}
4769
4770 // Constant for test vs zero
4771 operand immI_0()
4772 %{
4773 predicate(n->get_int() == 0);
4774 match(ConI);
4775
4776 op_cost(0);
4777 format %{ %}
4778 interface(CONST_INTER);
4779 %}
4780
4781 // Constant for increment
4782 operand immI_1()
4783 %{
4784 predicate(n->get_int() == 1);
4785 match(ConI);
4786
4787 op_cost(0);
4788 format %{ %}
4789 interface(CONST_INTER);
4790 %}
4791
4792 // Constant for decrement
4793 operand immI_M1()
4794 %{
4795 predicate(n->get_int() == -1);
4796 match(ConI);
4797
4798 op_cost(0);
4799 format %{ %}
4800 interface(CONST_INTER);
4801 %}
4802
4803 operand immI_2()
4804 %{
4805 predicate(n->get_int() == 2);
4806 match(ConI);
4807
4808 op_cost(0);
4809 format %{ %}
4810 interface(CONST_INTER);
4811 %}
4812
4813 operand immI_4()
4814 %{
4815 predicate(n->get_int() == 4);
4816 match(ConI);
4817
4818 op_cost(0);
4819 format %{ %}
4820 interface(CONST_INTER);
4821 %}
4822
4823 operand immI_8()
4824 %{
4825 predicate(n->get_int() == 8);
4826 match(ConI);
4827
4828 op_cost(0);
4829 format %{ %}
4830 interface(CONST_INTER);
4831 %}
4832
4833 // Valid scale values for addressing modes
4834 operand immI2()
4835 %{
4836 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4837 match(ConI);
4838
4839 format %{ %}
4840 interface(CONST_INTER);
4841 %}
4842
4843 operand immU7()
4844 %{
4845 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4846 match(ConI);
4847
4848 op_cost(5);
4849 format %{ %}
4850 interface(CONST_INTER);
4851 %}
4852
4853 operand immI8()
4854 %{
4855 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4856 match(ConI);
4857
4858 op_cost(5);
4859 format %{ %}
4860 interface(CONST_INTER);
4861 %}
4862
4863 operand immU8()
4864 %{
4865 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4866 match(ConI);
4867
4868 op_cost(5);
4869 format %{ %}
4870 interface(CONST_INTER);
4871 %}
4872
4873 operand immI16()
4874 %{
4875 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4876 match(ConI);
4877
4878 op_cost(10);
4879 format %{ %}
4880 interface(CONST_INTER);
4881 %}
4882
4883 // Int Immediate non-negative
4884 operand immU31()
4885 %{
4886 predicate(n->get_int() >= 0);
4887 match(ConI);
4888
4889 op_cost(0);
4890 format %{ %}
4891 interface(CONST_INTER);
4892 %}
4893
4894 // Pointer Immediate
4895 operand immP()
4896 %{
4897 match(ConP);
4898
4899 op_cost(10);
4900 format %{ %}
4901 interface(CONST_INTER);
4902 %}
4903
4904 // Null Pointer Immediate
4905 operand immP0()
4906 %{
4907 predicate(n->get_ptr() == 0);
4908 match(ConP);
4909
4910 op_cost(5);
4911 format %{ %}
4912 interface(CONST_INTER);
4913 %}
4914
4915 // Pointer Immediate
4916 operand immN() %{
4917 match(ConN);
4918
4919 op_cost(10);
4920 format %{ %}
4921 interface(CONST_INTER);
4922 %}
4923
4924 operand immNKlass() %{
4925 match(ConNKlass);
4926
4927 op_cost(10);
4928 format %{ %}
4929 interface(CONST_INTER);
4930 %}
4931
4932 // Null Pointer Immediate
4933 operand immN0() %{
4934 predicate(n->get_narrowcon() == 0);
4935 match(ConN);
4936
4937 op_cost(5);
4938 format %{ %}
4939 interface(CONST_INTER);
4940 %}
4941
4942 operand immP31()
4943 %{
4944 predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
4945 && (n->get_ptr() >> 31) == 0);
4946 match(ConP);
4947
4948 op_cost(5);
4949 format %{ %}
4950 interface(CONST_INTER);
4951 %}
4952
4953
4954 // Long Immediate
4955 operand immL()
4956 %{
4957 match(ConL);
4958
4959 op_cost(20);
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 // Long Immediate 8-bit
4965 operand immL8()
4966 %{
4967 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4968 match(ConL);
4969
4970 op_cost(5);
4971 format %{ %}
4972 interface(CONST_INTER);
4973 %}
4974
4975 // Long Immediate 32-bit unsigned
4976 operand immUL32()
4977 %{
4978 predicate(n->get_long() == (unsigned int) (n->get_long()));
4979 match(ConL);
4980
4981 op_cost(10);
4982 format %{ %}
4983 interface(CONST_INTER);
4984 %}
4985
4986 // Long Immediate 32-bit signed
4987 operand immL32()
4988 %{
4989 predicate(n->get_long() == (int) (n->get_long()));
4990 match(ConL);
4991
4992 op_cost(15);
4993 format %{ %}
4994 interface(CONST_INTER);
4995 %}
4996
4997 operand immL_Pow2()
4998 %{
4999 predicate(is_power_of_2((julong)n->get_long()));
5000 match(ConL);
5001
5002 op_cost(15);
5003 format %{ %}
5004 interface(CONST_INTER);
5005 %}
5006
5007 operand immL_NotPow2()
5008 %{
5009 predicate(is_power_of_2((julong)~n->get_long()));
5010 match(ConL);
5011
5012 op_cost(15);
5013 format %{ %}
5014 interface(CONST_INTER);
5015 %}
5016
5017 // Long Immediate zero
5018 operand immL0()
5019 %{
5020 predicate(n->get_long() == 0L);
5021 match(ConL);
5022
5023 op_cost(10);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Constant for increment
5029 operand immL1()
5030 %{
5031 predicate(n->get_long() == 1);
5032 match(ConL);
5033
5034 format %{ %}
5035 interface(CONST_INTER);
5036 %}
5037
5038 // Constant for decrement
5039 operand immL_M1()
5040 %{
5041 predicate(n->get_long() == -1);
5042 match(ConL);
5043
5044 format %{ %}
5045 interface(CONST_INTER);
5046 %}
5047
5048 // Long Immediate: low 32-bit mask
5049 operand immL_32bits()
5050 %{
5051 predicate(n->get_long() == 0xFFFFFFFFL);
5052 match(ConL);
5053 op_cost(20);
5054
5055 format %{ %}
5056 interface(CONST_INTER);
5057 %}
5058
5059 // Int Immediate: 2^n-1, positive
5060 operand immI_Pow2M1()
5061 %{
5062 predicate((n->get_int() > 0)
5063 && is_power_of_2((juint)n->get_int() + 1));
5064 match(ConI);
5065
5066 op_cost(20);
5067 format %{ %}
5068 interface(CONST_INTER);
5069 %}
5070
5071 // Float Immediate zero
5072 operand immF0()
5073 %{
5074 predicate(jint_cast(n->getf()) == 0);
5075 match(ConF);
5076
5077 op_cost(5);
5078 format %{ %}
5079 interface(CONST_INTER);
5080 %}
5081
5082 // Float Immediate
5083 operand immF()
5084 %{
5085 match(ConF);
5086
5087 op_cost(15);
5088 format %{ %}
5089 interface(CONST_INTER);
5090 %}
5091
5092 // Half Float Immediate
5093 operand immH()
5094 %{
5095 match(ConH);
5096
5097 op_cost(15);
5098 format %{ %}
5099 interface(CONST_INTER);
5100 %}
5101
5102 // Double Immediate zero
5103 operand immD0()
5104 %{
5105 predicate(jlong_cast(n->getd()) == 0);
5106 match(ConD);
5107
5108 op_cost(5);
5109 format %{ %}
5110 interface(CONST_INTER);
5111 %}
5112
5113 // Double Immediate
5114 operand immD()
5115 %{
5116 match(ConD);
5117
5118 op_cost(15);
5119 format %{ %}
5120 interface(CONST_INTER);
5121 %}
5122
5123 // Immediates for special shifts (sign extend)
5124
5125 // Constants for increment
5126 operand immI_16()
5127 %{
5128 predicate(n->get_int() == 16);
5129 match(ConI);
5130
5131 format %{ %}
5132 interface(CONST_INTER);
5133 %}
5134
5135 operand immI_24()
5136 %{
5137 predicate(n->get_int() == 24);
5138 match(ConI);
5139
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 // Constant for byte-wide masking
5145 operand immI_255()
5146 %{
5147 predicate(n->get_int() == 255);
5148 match(ConI);
5149
5150 format %{ %}
5151 interface(CONST_INTER);
5152 %}
5153
5154 // Constant for short-wide masking
5155 operand immI_65535()
5156 %{
5157 predicate(n->get_int() == 65535);
5158 match(ConI);
5159
5160 format %{ %}
5161 interface(CONST_INTER);
5162 %}
5163
5164 // Constant for byte-wide masking
5165 operand immL_255()
5166 %{
5167 predicate(n->get_long() == 255);
5168 match(ConL);
5169
5170 format %{ %}
5171 interface(CONST_INTER);
5172 %}
5173
5174 // Constant for short-wide masking
5175 operand immL_65535()
5176 %{
5177 predicate(n->get_long() == 65535);
5178 match(ConL);
5179
5180 format %{ %}
5181 interface(CONST_INTER);
5182 %}
5183
5184 // AOT Runtime Constants Address
5185 operand immAOTRuntimeConstantsAddress()
5186 %{
5187 // Check if the address is in the range of AOT Runtime Constants
5188 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5189 match(ConP);
5190
5191 op_cost(0);
5192 format %{ %}
5193 interface(CONST_INTER);
5194 %}
5195
5196 operand kReg()
5197 %{
5198 constraint(ALLOC_IN_RC(vectmask_reg));
5199 match(RegVectMask);
5200 format %{%}
5201 interface(REG_INTER);
5202 %}
5203
5204 // Register Operands
5205 // Integer Register
5206 operand rRegI()
5207 %{
5208 constraint(ALLOC_IN_RC(int_reg));
5209 match(RegI);
5210
5211 match(rax_RegI);
5212 match(rbx_RegI);
5213 match(rcx_RegI);
5214 match(rdx_RegI);
5215 match(rdi_RegI);
5216
5217 format %{ %}
5218 interface(REG_INTER);
5219 %}
5220
5221 // Special Registers
5222 operand rax_RegI()
5223 %{
5224 constraint(ALLOC_IN_RC(int_rax_reg));
5225 match(RegI);
5226 match(rRegI);
5227
5228 format %{ "RAX" %}
5229 interface(REG_INTER);
5230 %}
5231
5232 // Special Registers
5233 operand rbx_RegI()
5234 %{
5235 constraint(ALLOC_IN_RC(int_rbx_reg));
5236 match(RegI);
5237 match(rRegI);
5238
5239 format %{ "RBX" %}
5240 interface(REG_INTER);
5241 %}
5242
5243 operand rcx_RegI()
5244 %{
5245 constraint(ALLOC_IN_RC(int_rcx_reg));
5246 match(RegI);
5247 match(rRegI);
5248
5249 format %{ "RCX" %}
5250 interface(REG_INTER);
5251 %}
5252
5253 operand rdx_RegI()
5254 %{
5255 constraint(ALLOC_IN_RC(int_rdx_reg));
5256 match(RegI);
5257 match(rRegI);
5258
5259 format %{ "RDX" %}
5260 interface(REG_INTER);
5261 %}
5262
5263 operand rdi_RegI()
5264 %{
5265 constraint(ALLOC_IN_RC(int_rdi_reg));
5266 match(RegI);
5267 match(rRegI);
5268
5269 format %{ "RDI" %}
5270 interface(REG_INTER);
5271 %}
5272
5273 operand no_rax_rdx_RegI()
5274 %{
5275 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5276 match(RegI);
5277 match(rbx_RegI);
5278 match(rcx_RegI);
5279 match(rdi_RegI);
5280
5281 format %{ %}
5282 interface(REG_INTER);
5283 %}
5284
5285 operand no_rbp_r13_RegI()
5286 %{
5287 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5288 match(RegI);
5289 match(rRegI);
5290 match(rax_RegI);
5291 match(rbx_RegI);
5292 match(rcx_RegI);
5293 match(rdx_RegI);
5294 match(rdi_RegI);
5295
5296 format %{ %}
5297 interface(REG_INTER);
5298 %}
5299
5300 // Pointer Register
5301 operand any_RegP()
5302 %{
5303 constraint(ALLOC_IN_RC(any_reg));
5304 match(RegP);
5305 match(rax_RegP);
5306 match(rbx_RegP);
5307 match(rdi_RegP);
5308 match(rsi_RegP);
5309 match(rbp_RegP);
5310 match(r15_RegP);
5311 match(rRegP);
5312
5313 format %{ %}
5314 interface(REG_INTER);
5315 %}
5316
5317 operand rRegP()
5318 %{
5319 constraint(ALLOC_IN_RC(ptr_reg));
5320 match(RegP);
5321 match(rax_RegP);
5322 match(rbx_RegP);
5323 match(rdi_RegP);
5324 match(rsi_RegP);
5325 match(rbp_RegP); // See Q&A below about
5326 match(r15_RegP); // r15_RegP and rbp_RegP.
5327
5328 format %{ %}
5329 interface(REG_INTER);
5330 %}
5331
5332 operand rRegN() %{
5333 constraint(ALLOC_IN_RC(int_reg));
5334 match(RegN);
5335
5336 format %{ %}
5337 interface(REG_INTER);
5338 %}
5339
5340 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5341 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5342 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5343 // The output of an instruction is controlled by the allocator, which respects
5344 // register class masks, not match rules. Unless an instruction mentions
5345 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5346 // by the allocator as an input.
5347 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5348 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5349 // result, RBP is not included in the output of the instruction either.
5350
5351 // This operand is not allowed to use RBP even if
5352 // RBP is not used to hold the frame pointer.
5353 operand no_rbp_RegP()
5354 %{
5355 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5356 match(RegP);
5357 match(rbx_RegP);
5358 match(rsi_RegP);
5359 match(rdi_RegP);
5360
5361 format %{ %}
5362 interface(REG_INTER);
5363 %}
5364
5365 // Special Registers
5366 // Return a pointer value
5367 operand rax_RegP()
5368 %{
5369 constraint(ALLOC_IN_RC(ptr_rax_reg));
5370 match(RegP);
5371 match(rRegP);
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 // Special Registers
5378 // Return a compressed pointer value
5379 operand rax_RegN()
5380 %{
5381 constraint(ALLOC_IN_RC(int_rax_reg));
5382 match(RegN);
5383 match(rRegN);
5384
5385 format %{ %}
5386 interface(REG_INTER);
5387 %}
5388
5389 // Used in AtomicAdd
5390 operand rbx_RegP()
5391 %{
5392 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5393 match(RegP);
5394 match(rRegP);
5395
5396 format %{ %}
5397 interface(REG_INTER);
5398 %}
5399
5400 operand rsi_RegP()
5401 %{
5402 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5403 match(RegP);
5404 match(rRegP);
5405
5406 format %{ %}
5407 interface(REG_INTER);
5408 %}
5409
5410 operand rbp_RegP()
5411 %{
5412 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5413 match(RegP);
5414 match(rRegP);
5415
5416 format %{ %}
5417 interface(REG_INTER);
5418 %}
5419
5420 // Used in rep stosq
5421 operand rdi_RegP()
5422 %{
5423 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5424 match(RegP);
5425 match(rRegP);
5426
5427 format %{ %}
5428 interface(REG_INTER);
5429 %}
5430
5431 operand r15_RegP()
5432 %{
5433 constraint(ALLOC_IN_RC(ptr_r15_reg));
5434 match(RegP);
5435 match(rRegP);
5436
5437 format %{ %}
5438 interface(REG_INTER);
5439 %}
5440
5441 operand rRegL()
5442 %{
5443 constraint(ALLOC_IN_RC(long_reg));
5444 match(RegL);
5445 match(rax_RegL);
5446 match(rdx_RegL);
5447
5448 format %{ %}
5449 interface(REG_INTER);
5450 %}
5451
5452 // Special Registers
5453 operand no_rax_rdx_RegL()
5454 %{
5455 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5456 match(RegL);
5457 match(rRegL);
5458
5459 format %{ %}
5460 interface(REG_INTER);
5461 %}
5462
5463 operand rax_RegL()
5464 %{
5465 constraint(ALLOC_IN_RC(long_rax_reg));
5466 match(RegL);
5467 match(rRegL);
5468
5469 format %{ "RAX" %}
5470 interface(REG_INTER);
5471 %}
5472
5473 operand rcx_RegL()
5474 %{
5475 constraint(ALLOC_IN_RC(long_rcx_reg));
5476 match(RegL);
5477 match(rRegL);
5478
5479 format %{ %}
5480 interface(REG_INTER);
5481 %}
5482
5483 operand rdx_RegL()
5484 %{
5485 constraint(ALLOC_IN_RC(long_rdx_reg));
5486 match(RegL);
5487 match(rRegL);
5488
5489 format %{ %}
5490 interface(REG_INTER);
5491 %}
5492
5493 operand r11_RegL()
5494 %{
5495 constraint(ALLOC_IN_RC(long_r11_reg));
5496 match(RegL);
5497 match(rRegL);
5498
5499 format %{ %}
5500 interface(REG_INTER);
5501 %}
5502
5503 operand no_rbp_r13_RegL()
5504 %{
5505 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5506 match(RegL);
5507 match(rRegL);
5508 match(rax_RegL);
5509 match(rcx_RegL);
5510 match(rdx_RegL);
5511
5512 format %{ %}
5513 interface(REG_INTER);
5514 %}
5515
5516 // Flags register, used as output of compare instructions
5517 operand rFlagsReg()
5518 %{
5519 constraint(ALLOC_IN_RC(int_flags));
5520 match(RegFlags);
5521
5522 format %{ "RFLAGS" %}
5523 interface(REG_INTER);
5524 %}
5525
5526 // Flags register, used as output of FLOATING POINT compare instructions
5527 operand rFlagsRegU()
5528 %{
5529 constraint(ALLOC_IN_RC(int_flags));
5530 match(RegFlags);
5531
5532 format %{ "RFLAGS_U" %}
5533 interface(REG_INTER);
5534 %}
5535
5536 operand rFlagsRegUCF() %{
5537 constraint(ALLOC_IN_RC(int_flags));
5538 match(RegFlags);
5539 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5540
5541 format %{ "RFLAGS_U_CF" %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand rFlagsRegUCFE() %{
5546 constraint(ALLOC_IN_RC(int_flags));
5547 match(RegFlags);
5548 predicate(UseAPX && VM_Version::supports_avx10_2());
5549
5550 format %{ "RFLAGS_U_CFE" %}
5551 interface(REG_INTER);
5552 %}
5553
5554 // Float register operands
5555 operand regF() %{
5556 constraint(ALLOC_IN_RC(float_reg));
5557 match(RegF);
5558
5559 format %{ %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Float register operands
5564 operand legRegF() %{
5565 constraint(ALLOC_IN_RC(float_reg_legacy));
5566 match(RegF);
5567
5568 format %{ %}
5569 interface(REG_INTER);
5570 %}
5571
5572 // Float register operands
5573 operand vlRegF() %{
5574 constraint(ALLOC_IN_RC(float_reg_vl));
5575 match(RegF);
5576
5577 format %{ %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Double register operands
5582 operand regD() %{
5583 constraint(ALLOC_IN_RC(double_reg));
5584 match(RegD);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Double register operands
5591 operand legRegD() %{
5592 constraint(ALLOC_IN_RC(double_reg_legacy));
5593 match(RegD);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Double register operands
5600 operand vlRegD() %{
5601 constraint(ALLOC_IN_RC(double_reg_vl));
5602 match(RegD);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 //----------Memory Operands----------------------------------------------------
5609 // Direct Memory Operand
5610 // operand direct(immP addr)
5611 // %{
5612 // match(addr);
5613
5614 // format %{ "[$addr]" %}
5615 // interface(MEMORY_INTER) %{
5616 // base(0xFFFFFFFF);
5617 // index(0x4);
5618 // scale(0x0);
5619 // disp($addr);
5620 // %}
5621 // %}
5622
5623 // Indirect Memory Operand
5624 operand indirect(any_RegP reg)
5625 %{
5626 constraint(ALLOC_IN_RC(ptr_reg));
5627 match(reg);
5628
5629 format %{ "[$reg]" %}
5630 interface(MEMORY_INTER) %{
5631 base($reg);
5632 index(0x4);
5633 scale(0x0);
5634 disp(0x0);
5635 %}
5636 %}
5637
5638 // Indirect Memory Plus Short Offset Operand
5639 operand indOffset8(any_RegP reg, immL8 off)
5640 %{
5641 constraint(ALLOC_IN_RC(ptr_reg));
5642 match(AddP reg off);
5643
5644 format %{ "[$reg + $off (8-bit)]" %}
5645 interface(MEMORY_INTER) %{
5646 base($reg);
5647 index(0x4);
5648 scale(0x0);
5649 disp($off);
5650 %}
5651 %}
5652
5653 // Indirect Memory Plus Long Offset Operand
5654 operand indOffset32(any_RegP reg, immL32 off)
5655 %{
5656 constraint(ALLOC_IN_RC(ptr_reg));
5657 match(AddP reg off);
5658
5659 format %{ "[$reg + $off (32-bit)]" %}
5660 interface(MEMORY_INTER) %{
5661 base($reg);
5662 index(0x4);
5663 scale(0x0);
5664 disp($off);
5665 %}
5666 %}
5667
5668 // Indirect Memory Plus Index Register Plus Offset Operand
5669 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5670 %{
5671 constraint(ALLOC_IN_RC(ptr_reg));
5672 match(AddP (AddP reg lreg) off);
5673
5674 op_cost(10);
5675 format %{"[$reg + $off + $lreg]" %}
5676 interface(MEMORY_INTER) %{
5677 base($reg);
5678 index($lreg);
5679 scale(0x0);
5680 disp($off);
5681 %}
5682 %}
5683
5684 // Indirect Memory Plus Index Register Plus Offset Operand
5685 operand indIndex(any_RegP reg, rRegL lreg)
5686 %{
5687 constraint(ALLOC_IN_RC(ptr_reg));
5688 match(AddP reg lreg);
5689
5690 op_cost(10);
5691 format %{"[$reg + $lreg]" %}
5692 interface(MEMORY_INTER) %{
5693 base($reg);
5694 index($lreg);
5695 scale(0x0);
5696 disp(0x0);
5697 %}
5698 %}
5699
5700 // Indirect Memory Times Scale Plus Index Register
5701 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5702 %{
5703 constraint(ALLOC_IN_RC(ptr_reg));
5704 match(AddP reg (LShiftL lreg scale));
5705
5706 op_cost(10);
5707 format %{"[$reg + $lreg << $scale]" %}
5708 interface(MEMORY_INTER) %{
5709 base($reg);
5710 index($lreg);
5711 scale($scale);
5712 disp(0x0);
5713 %}
5714 %}
5715
5716 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5717 %{
5718 constraint(ALLOC_IN_RC(ptr_reg));
5719 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5720 match(AddP reg (LShiftL (ConvI2L idx) scale));
5721
5722 op_cost(10);
5723 format %{"[$reg + pos $idx << $scale]" %}
5724 interface(MEMORY_INTER) %{
5725 base($reg);
5726 index($idx);
5727 scale($scale);
5728 disp(0x0);
5729 %}
5730 %}
5731
5732 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5733 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5734 %{
5735 constraint(ALLOC_IN_RC(ptr_reg));
5736 match(AddP (AddP reg (LShiftL lreg scale)) off);
5737
5738 op_cost(10);
5739 format %{"[$reg + $off + $lreg << $scale]" %}
5740 interface(MEMORY_INTER) %{
5741 base($reg);
5742 index($lreg);
5743 scale($scale);
5744 disp($off);
5745 %}
5746 %}
5747
5748 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5749 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5750 %{
5751 constraint(ALLOC_IN_RC(ptr_reg));
5752 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5753 match(AddP (AddP reg (ConvI2L idx)) off);
5754
5755 op_cost(10);
5756 format %{"[$reg + $off + $idx]" %}
5757 interface(MEMORY_INTER) %{
5758 base($reg);
5759 index($idx);
5760 scale(0x0);
5761 disp($off);
5762 %}
5763 %}
5764
5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5767 %{
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5770 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5771
5772 op_cost(10);
5773 format %{"[$reg + $off + $idx << $scale]" %}
5774 interface(MEMORY_INTER) %{
5775 base($reg);
5776 index($idx);
5777 scale($scale);
5778 disp($off);
5779 %}
5780 %}
5781
5782 // Indirect Narrow Oop Plus Offset Operand
5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5784 // we can't free r12 even with CompressedOops::base() == nullptr.
5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5786 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(AddP (DecodeN reg) off);
5789
5790 op_cost(10);
5791 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5792 interface(MEMORY_INTER) %{
5793 base(0xc); // R12
5794 index($reg);
5795 scale(0x3);
5796 disp($off);
5797 %}
5798 %}
5799
5800 // Indirect Memory Operand
5801 operand indirectNarrow(rRegN reg)
5802 %{
5803 predicate(CompressedOops::shift() == 0);
5804 constraint(ALLOC_IN_RC(ptr_reg));
5805 match(DecodeN reg);
5806
5807 format %{ "[$reg]" %}
5808 interface(MEMORY_INTER) %{
5809 base($reg);
5810 index(0x4);
5811 scale(0x0);
5812 disp(0x0);
5813 %}
5814 %}
5815
5816 // Indirect Memory Plus Short Offset Operand
5817 operand indOffset8Narrow(rRegN reg, immL8 off)
5818 %{
5819 predicate(CompressedOops::shift() == 0);
5820 constraint(ALLOC_IN_RC(ptr_reg));
5821 match(AddP (DecodeN reg) off);
5822
5823 format %{ "[$reg + $off (8-bit)]" %}
5824 interface(MEMORY_INTER) %{
5825 base($reg);
5826 index(0x4);
5827 scale(0x0);
5828 disp($off);
5829 %}
5830 %}
5831
5832 // Indirect Memory Plus Long Offset Operand
5833 operand indOffset32Narrow(rRegN reg, immL32 off)
5834 %{
5835 predicate(CompressedOops::shift() == 0);
5836 constraint(ALLOC_IN_RC(ptr_reg));
5837 match(AddP (DecodeN reg) off);
5838
5839 format %{ "[$reg + $off (32-bit)]" %}
5840 interface(MEMORY_INTER) %{
5841 base($reg);
5842 index(0x4);
5843 scale(0x0);
5844 disp($off);
5845 %}
5846 %}
5847
5848 // Indirect Memory Plus Index Register Plus Offset Operand
5849 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5850 %{
5851 predicate(CompressedOops::shift() == 0);
5852 constraint(ALLOC_IN_RC(ptr_reg));
5853 match(AddP (AddP (DecodeN reg) lreg) off);
5854
5855 op_cost(10);
5856 format %{"[$reg + $off + $lreg]" %}
5857 interface(MEMORY_INTER) %{
5858 base($reg);
5859 index($lreg);
5860 scale(0x0);
5861 disp($off);
5862 %}
5863 %}
5864
5865 // Indirect Memory Plus Index Register Plus Offset Operand
5866 operand indIndexNarrow(rRegN reg, rRegL lreg)
5867 %{
5868 predicate(CompressedOops::shift() == 0);
5869 constraint(ALLOC_IN_RC(ptr_reg));
5870 match(AddP (DecodeN reg) lreg);
5871
5872 op_cost(10);
5873 format %{"[$reg + $lreg]" %}
5874 interface(MEMORY_INTER) %{
5875 base($reg);
5876 index($lreg);
5877 scale(0x0);
5878 disp(0x0);
5879 %}
5880 %}
5881
5882 // Indirect Memory Times Scale Plus Index Register
5883 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5884 %{
5885 predicate(CompressedOops::shift() == 0);
5886 constraint(ALLOC_IN_RC(ptr_reg));
5887 match(AddP (DecodeN reg) (LShiftL lreg scale));
5888
5889 op_cost(10);
5890 format %{"[$reg + $lreg << $scale]" %}
5891 interface(MEMORY_INTER) %{
5892 base($reg);
5893 index($lreg);
5894 scale($scale);
5895 disp(0x0);
5896 %}
5897 %}
5898
5899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5900 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5901 %{
5902 predicate(CompressedOops::shift() == 0);
5903 constraint(ALLOC_IN_RC(ptr_reg));
5904 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5905
5906 op_cost(10);
5907 format %{"[$reg + $off + $lreg << $scale]" %}
5908 interface(MEMORY_INTER) %{
5909 base($reg);
5910 index($lreg);
5911 scale($scale);
5912 disp($off);
5913 %}
5914 %}
5915
5916 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5917 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5918 %{
5919 constraint(ALLOC_IN_RC(ptr_reg));
5920 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5921 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5922
5923 op_cost(10);
5924 format %{"[$reg + $off + $idx]" %}
5925 interface(MEMORY_INTER) %{
5926 base($reg);
5927 index($idx);
5928 scale(0x0);
5929 disp($off);
5930 %}
5931 %}
5932
5933 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5934 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5935 %{
5936 constraint(ALLOC_IN_RC(ptr_reg));
5937 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5938 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5939
5940 op_cost(10);
5941 format %{"[$reg + $off + $idx << $scale]" %}
5942 interface(MEMORY_INTER) %{
5943 base($reg);
5944 index($idx);
5945 scale($scale);
5946 disp($off);
5947 %}
5948 %}
5949
5950 //----------Special Memory Operands--------------------------------------------
5951 // Stack Slot Operand - This operand is used for loading and storing temporary
5952 // values on the stack where a match requires a value to
5953 // flow through memory.
5954 operand stackSlotP(sRegP reg)
5955 %{
5956 constraint(ALLOC_IN_RC(stack_slots));
5957 // No match rule because this operand is only generated in matching
5958
5959 format %{ "[$reg]" %}
5960 interface(MEMORY_INTER) %{
5961 base(0x4); // RSP
5962 index(0x4); // No Index
5963 scale(0x0); // No Scale
5964 disp($reg); // Stack Offset
5965 %}
5966 %}
5967
5968 operand stackSlotI(sRegI reg)
5969 %{
5970 constraint(ALLOC_IN_RC(stack_slots));
5971 // No match rule because this operand is only generated in matching
5972
5973 format %{ "[$reg]" %}
5974 interface(MEMORY_INTER) %{
5975 base(0x4); // RSP
5976 index(0x4); // No Index
5977 scale(0x0); // No Scale
5978 disp($reg); // Stack Offset
5979 %}
5980 %}
5981
5982 operand stackSlotF(sRegF reg)
5983 %{
5984 constraint(ALLOC_IN_RC(stack_slots));
5985 // No match rule because this operand is only generated in matching
5986
5987 format %{ "[$reg]" %}
5988 interface(MEMORY_INTER) %{
5989 base(0x4); // RSP
5990 index(0x4); // No Index
5991 scale(0x0); // No Scale
5992 disp($reg); // Stack Offset
5993 %}
5994 %}
5995
5996 operand stackSlotD(sRegD reg)
5997 %{
5998 constraint(ALLOC_IN_RC(stack_slots));
5999 // No match rule because this operand is only generated in matching
6000
6001 format %{ "[$reg]" %}
6002 interface(MEMORY_INTER) %{
6003 base(0x4); // RSP
6004 index(0x4); // No Index
6005 scale(0x0); // No Scale
6006 disp($reg); // Stack Offset
6007 %}
6008 %}
6009 operand stackSlotL(sRegL reg)
6010 %{
6011 constraint(ALLOC_IN_RC(stack_slots));
6012 // No match rule because this operand is only generated in matching
6013
6014 format %{ "[$reg]" %}
6015 interface(MEMORY_INTER) %{
6016 base(0x4); // RSP
6017 index(0x4); // No Index
6018 scale(0x0); // No Scale
6019 disp($reg); // Stack Offset
6020 %}
6021 %}
6022
6023 //----------Conditional Branch Operands----------------------------------------
6024 // Comparison Op - This is the operation of the comparison, and is limited to
6025 // the following set of codes:
6026 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6027 //
6028 // Other attributes of the comparison, such as unsignedness, are specified
6029 // by the comparison instruction that sets a condition code flags register.
6030 // That result is represented by a flags operand whose subtype is appropriate
6031 // to the unsignedness (etc.) of the comparison.
6032 //
6033 // Later, the instruction which matches both the Comparison Op (a Bool) and
6034 // the flags (produced by the Cmp) specifies the coding of the comparison op
6035 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6036
6037 // Comparison Code
6038 operand cmpOp()
6039 %{
6040 match(Bool);
6041
6042 format %{ "" %}
6043 interface(COND_INTER) %{
6044 equal(0x4, "e");
6045 not_equal(0x5, "ne");
6046 less(0xc, "l");
6047 greater_equal(0xd, "ge");
6048 less_equal(0xe, "le");
6049 greater(0xf, "g");
6050 overflow(0x0, "o");
6051 no_overflow(0x1, "no");
6052 %}
6053 %}
6054
6055 // Comparison Code, unsigned compare. Used by FP also, with
6056 // C2 (unordered) turned into GT or LT already. The other bits
6057 // C0 and C3 are turned into Carry & Zero flags.
6058 operand cmpOpU()
6059 %{
6060 match(Bool);
6061
6062 format %{ "" %}
6063 interface(COND_INTER) %{
6064 equal(0x4, "e");
6065 not_equal(0x5, "ne");
6066 less(0x2, "b");
6067 greater_equal(0x3, "ae");
6068 less_equal(0x6, "be");
6069 greater(0x7, "a");
6070 overflow(0x0, "o");
6071 no_overflow(0x1, "no");
6072 %}
6073 %}
6074
6075
6076 // Floating comparisons that don't require any fixup for the unordered case,
6077 // If both inputs of the comparison are the same, ZF is always set so we
6078 // don't need to use cmpOpUCF2 for eq/ne
6079 operand cmpOpUCF() %{
6080 match(Bool);
6081 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6082 (n->as_Bool()->_test._test == BoolTest::lt ||
6083 n->as_Bool()->_test._test == BoolTest::ge ||
6084 n->as_Bool()->_test._test == BoolTest::le ||
6085 n->as_Bool()->_test._test == BoolTest::gt ||
6086 n->in(1)->in(1) == n->in(1)->in(2)));
6087 format %{ "" %}
6088 interface(COND_INTER) %{
6089 equal(0xb, "np");
6090 not_equal(0xa, "p");
6091 less(0x2, "b");
6092 greater_equal(0x3, "ae");
6093 less_equal(0x6, "be");
6094 greater(0x7, "a");
6095 overflow(0x0, "o");
6096 no_overflow(0x1, "no");
6097 %}
6098 %}
6099
6100
6101 // Floating comparisons that can be fixed up with extra conditional jumps
6102 operand cmpOpUCF2() %{
6103 match(Bool);
6104 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6105 (n->as_Bool()->_test._test == BoolTest::ne ||
6106 n->as_Bool()->_test._test == BoolTest::eq) &&
6107 n->in(1)->in(1) != n->in(1)->in(2));
6108 format %{ "" %}
6109 interface(COND_INTER) %{
6110 equal(0x4, "e");
6111 not_equal(0x5, "ne");
6112 less(0x2, "b");
6113 greater_equal(0x3, "ae");
6114 less_equal(0x6, "be");
6115 greater(0x7, "a");
6116 overflow(0x0, "o");
6117 no_overflow(0x1, "no");
6118 %}
6119 %}
6120
6121
6122 // Floating point comparisons that set condition flags to test more directly,
6123 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6124 // are used for L (<) and LE (<=) conditions. It's important to convert these
6125 // latter conditions to ones that use unsigned tests before passing into an
6126 // instruction because the preceding comparison might be based on a three way
6127 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6128 operand cmpOpUCFE()
6129 %{
6130 match(Bool);
6131 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6132 (n->as_Bool()->_test._test == BoolTest::ne ||
6133 n->as_Bool()->_test._test == BoolTest::eq ||
6134 n->as_Bool()->_test._test == BoolTest::lt ||
6135 n->as_Bool()->_test._test == BoolTest::ge ||
6136 n->as_Bool()->_test._test == BoolTest::le ||
6137 n->as_Bool()->_test._test == BoolTest::gt));
6138
6139 format %{ "" %}
6140 interface(COND_INTER) %{
6141 equal(0x4, "e");
6142 not_equal(0x5, "ne");
6143 less(0x2, "b");
6144 greater_equal(0x3, "ae");
6145 less_equal(0x6, "be");
6146 greater(0x7, "a");
6147 overflow(0x0, "o");
6148 no_overflow(0x1, "no");
6149 %}
6150 %}
6151
6152 // Operands for bound floating pointer register arguments
6153 operand rxmm0() %{
6154 constraint(ALLOC_IN_RC(xmm0_reg));
6155 match(VecX);
6156 format%{%}
6157 interface(REG_INTER);
6158 %}
6159
6160 // Vectors
6161
6162 // Dummy generic vector class. Should be used for all vector operands.
6163 // Replaced with vec[SDXYZ] during post-selection pass.
6164 operand vec() %{
6165 constraint(ALLOC_IN_RC(dynamic));
6166 match(VecX);
6167 match(VecY);
6168 match(VecZ);
6169 match(VecS);
6170 match(VecD);
6171
6172 format %{ %}
6173 interface(REG_INTER);
6174 %}
6175
6176 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6177 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6178 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6179 // runtime code generation via reg_class_dynamic.
6180 operand legVec() %{
6181 constraint(ALLOC_IN_RC(dynamic));
6182 match(VecX);
6183 match(VecY);
6184 match(VecZ);
6185 match(VecS);
6186 match(VecD);
6187
6188 format %{ %}
6189 interface(REG_INTER);
6190 %}
6191
6192 // Replaces vec during post-selection cleanup. See above.
6193 operand vecS() %{
6194 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6195 match(VecS);
6196
6197 format %{ %}
6198 interface(REG_INTER);
6199 %}
6200
6201 // Replaces legVec during post-selection cleanup. See above.
6202 operand legVecS() %{
6203 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6204 match(VecS);
6205
6206 format %{ %}
6207 interface(REG_INTER);
6208 %}
6209
6210 // Replaces vec during post-selection cleanup. See above.
6211 operand vecD() %{
6212 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6213 match(VecD);
6214
6215 format %{ %}
6216 interface(REG_INTER);
6217 %}
6218
6219 // Replaces legVec during post-selection cleanup. See above.
6220 operand legVecD() %{
6221 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6222 match(VecD);
6223
6224 format %{ %}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Replaces vec during post-selection cleanup. See above.
6229 operand vecX() %{
6230 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6231 match(VecX);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Replaces legVec during post-selection cleanup. See above.
6238 operand legVecX() %{
6239 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6240 match(VecX);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces vec during post-selection cleanup. See above.
6247 operand vecY() %{
6248 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6249 match(VecY);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces legVec during post-selection cleanup. See above.
6256 operand legVecY() %{
6257 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6258 match(VecY);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces vec during post-selection cleanup. See above.
6265 operand vecZ() %{
6266 constraint(ALLOC_IN_RC(vectorz_reg));
6267 match(VecZ);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces legVec during post-selection cleanup. See above.
6274 operand legVecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 //----------OPERAND CLASSES----------------------------------------------------
6283 // Operand Classes are groups of operands that are used as to simplify
6284 // instruction definitions by not requiring the AD writer to specify separate
6285 // instructions for every form of operand when the instruction accepts
6286 // multiple operand types with the same basic encoding and format. The classic
6287 // case of this is memory operands.
6288
6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6290 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6291 indCompressedOopOffset,
6292 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6293 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6294 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6295
6296 //----------PIPELINE-----------------------------------------------------------
6297 // Rules which define the behavior of the target architectures pipeline.
6298 pipeline %{
6299
6300 //----------ATTRIBUTES---------------------------------------------------------
6301 attributes %{
6302 variable_size_instructions; // Fixed size instructions
6303 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6304 instruction_unit_size = 1; // An instruction is 1 bytes long
6305 instruction_fetch_unit_size = 16; // The processor fetches one line
6306 instruction_fetch_units = 1; // of 16 bytes
6307 %}
6308
6309 //----------RESOURCES----------------------------------------------------------
6310 // Resources are the functional units available to the machine
6311
6312 // Generic P2/P3 pipeline
6313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6314 // 3 instructions decoded per cycle.
6315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6316 // 3 ALU op, only ALU0 handles mul instructions.
6317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6318 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6319 BR, FPU,
6320 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6321
6322 //----------PIPELINE DESCRIPTION-----------------------------------------------
6323 // Pipeline Description specifies the stages in the machine's pipeline
6324
6325 // Generic P2/P3 pipeline
6326 pipe_desc(S0, S1, S2, S3, S4, S5);
6327
6328 //----------PIPELINE CLASSES---------------------------------------------------
6329 // Pipeline Classes describe the stages in which input and output are
6330 // referenced by the hardware pipeline.
6331
6332 // Naming convention: ialu or fpu
6333 // Then: _reg
6334 // Then: _reg if there is a 2nd register
6335 // Then: _long if it's a pair of instructions implementing a long
6336 // Then: _fat if it requires the big decoder
6337 // Or: _mem if it requires the big decoder and a memory unit.
6338
6339 // Integer ALU reg operation
6340 pipe_class ialu_reg(rRegI dst)
6341 %{
6342 single_instruction;
6343 dst : S4(write);
6344 dst : S3(read);
6345 DECODE : S0; // any decoder
6346 ALU : S3; // any alu
6347 %}
6348
6349 // Long ALU reg operation
6350 pipe_class ialu_reg_long(rRegL dst)
6351 %{
6352 instruction_count(2);
6353 dst : S4(write);
6354 dst : S3(read);
6355 DECODE : S0(2); // any 2 decoders
6356 ALU : S3(2); // both alus
6357 %}
6358
6359 // Integer ALU reg operation using big decoder
6360 pipe_class ialu_reg_fat(rRegI dst)
6361 %{
6362 single_instruction;
6363 dst : S4(write);
6364 dst : S3(read);
6365 D0 : S0; // big decoder only
6366 ALU : S3; // any alu
6367 %}
6368
6369 // Integer ALU reg-reg operation
6370 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6371 %{
6372 single_instruction;
6373 dst : S4(write);
6374 src : S3(read);
6375 DECODE : S0; // any decoder
6376 ALU : S3; // any alu
6377 %}
6378
6379 // Integer ALU reg-reg operation
6380 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6381 %{
6382 single_instruction;
6383 dst : S4(write);
6384 src : S3(read);
6385 D0 : S0; // big decoder only
6386 ALU : S3; // any alu
6387 %}
6388
6389 // Integer ALU reg-mem operation
6390 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6391 %{
6392 single_instruction;
6393 dst : S5(write);
6394 mem : S3(read);
6395 D0 : S0; // big decoder only
6396 ALU : S4; // any alu
6397 MEM : S3; // any mem
6398 %}
6399
6400 // Integer mem operation (prefetch)
6401 pipe_class ialu_mem(memory mem)
6402 %{
6403 single_instruction;
6404 mem : S3(read);
6405 D0 : S0; // big decoder only
6406 MEM : S3; // any mem
6407 %}
6408
6409 // Integer Store to Memory
6410 pipe_class ialu_mem_reg(memory mem, rRegI src)
6411 %{
6412 single_instruction;
6413 mem : S3(read);
6414 src : S5(read);
6415 D0 : S0; // big decoder only
6416 ALU : S4; // any alu
6417 MEM : S3;
6418 %}
6419
6420 // // Long Store to Memory
6421 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6422 // %{
6423 // instruction_count(2);
6424 // mem : S3(read);
6425 // src : S5(read);
6426 // D0 : S0(2); // big decoder only; twice
6427 // ALU : S4(2); // any 2 alus
6428 // MEM : S3(2); // Both mems
6429 // %}
6430
6431 // Integer Store to Memory
6432 pipe_class ialu_mem_imm(memory mem)
6433 %{
6434 single_instruction;
6435 mem : S3(read);
6436 D0 : S0; // big decoder only
6437 ALU : S4; // any alu
6438 MEM : S3;
6439 %}
6440
6441 // Integer ALU0 reg-reg operation
6442 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6443 %{
6444 single_instruction;
6445 dst : S4(write);
6446 src : S3(read);
6447 D0 : S0; // Big decoder only
6448 ALU0 : S3; // only alu0
6449 %}
6450
6451 // Integer ALU0 reg-mem operation
6452 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6453 %{
6454 single_instruction;
6455 dst : S5(write);
6456 mem : S3(read);
6457 D0 : S0; // big decoder only
6458 ALU0 : S4; // ALU0 only
6459 MEM : S3; // any mem
6460 %}
6461
6462 // Integer ALU reg-reg operation
6463 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6464 %{
6465 single_instruction;
6466 cr : S4(write);
6467 src1 : S3(read);
6468 src2 : S3(read);
6469 DECODE : S0; // any decoder
6470 ALU : S3; // any alu
6471 %}
6472
6473 // Integer ALU reg-imm operation
6474 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6475 %{
6476 single_instruction;
6477 cr : S4(write);
6478 src1 : S3(read);
6479 DECODE : S0; // any decoder
6480 ALU : S3; // any alu
6481 %}
6482
6483 // Integer ALU reg-mem operation
6484 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6485 %{
6486 single_instruction;
6487 cr : S4(write);
6488 src1 : S3(read);
6489 src2 : S3(read);
6490 D0 : S0; // big decoder only
6491 ALU : S4; // any alu
6492 MEM : S3;
6493 %}
6494
6495 // Conditional move reg-reg
6496 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6497 %{
6498 instruction_count(4);
6499 y : S4(read);
6500 q : S3(read);
6501 p : S3(read);
6502 DECODE : S0(4); // any decoder
6503 %}
6504
6505 // Conditional move reg-reg
6506 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6507 %{
6508 single_instruction;
6509 dst : S4(write);
6510 src : S3(read);
6511 cr : S3(read);
6512 DECODE : S0; // any decoder
6513 %}
6514
6515 // Conditional move reg-mem
6516 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6517 %{
6518 single_instruction;
6519 dst : S4(write);
6520 src : S3(read);
6521 cr : S3(read);
6522 DECODE : S0; // any decoder
6523 MEM : S3;
6524 %}
6525
6526 // Conditional move reg-reg long
6527 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6528 %{
6529 single_instruction;
6530 dst : S4(write);
6531 src : S3(read);
6532 cr : S3(read);
6533 DECODE : S0(2); // any 2 decoders
6534 %}
6535
6536 // Float reg-reg operation
6537 pipe_class fpu_reg(regD dst)
6538 %{
6539 instruction_count(2);
6540 dst : S3(read);
6541 DECODE : S0(2); // any 2 decoders
6542 FPU : S3;
6543 %}
6544
6545 // Float reg-reg operation
6546 pipe_class fpu_reg_reg(regD dst, regD src)
6547 %{
6548 instruction_count(2);
6549 dst : S4(write);
6550 src : S3(read);
6551 DECODE : S0(2); // any 2 decoders
6552 FPU : S3;
6553 %}
6554
6555 // Float reg-reg operation
6556 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6557 %{
6558 instruction_count(3);
6559 dst : S4(write);
6560 src1 : S3(read);
6561 src2 : S3(read);
6562 DECODE : S0(3); // any 3 decoders
6563 FPU : S3(2);
6564 %}
6565
6566 // Float reg-reg operation
6567 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6568 %{
6569 instruction_count(4);
6570 dst : S4(write);
6571 src1 : S3(read);
6572 src2 : S3(read);
6573 src3 : S3(read);
6574 DECODE : S0(4); // any 3 decoders
6575 FPU : S3(2);
6576 %}
6577
6578 // Float reg-reg operation
6579 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6580 %{
6581 instruction_count(4);
6582 dst : S4(write);
6583 src1 : S3(read);
6584 src2 : S3(read);
6585 src3 : S3(read);
6586 DECODE : S1(3); // any 3 decoders
6587 D0 : S0; // Big decoder only
6588 FPU : S3(2);
6589 MEM : S3;
6590 %}
6591
6592 // Float reg-mem operation
6593 pipe_class fpu_reg_mem(regD dst, memory mem)
6594 %{
6595 instruction_count(2);
6596 dst : S5(write);
6597 mem : S3(read);
6598 D0 : S0; // big decoder only
6599 DECODE : S1; // any decoder for FPU POP
6600 FPU : S4;
6601 MEM : S3; // any mem
6602 %}
6603
6604 // Float reg-mem operation
6605 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6606 %{
6607 instruction_count(3);
6608 dst : S5(write);
6609 src1 : S3(read);
6610 mem : S3(read);
6611 D0 : S0; // big decoder only
6612 DECODE : S1(2); // any decoder for FPU POP
6613 FPU : S4;
6614 MEM : S3; // any mem
6615 %}
6616
6617 // Float mem-reg operation
6618 pipe_class fpu_mem_reg(memory mem, regD src)
6619 %{
6620 instruction_count(2);
6621 src : S5(read);
6622 mem : S3(read);
6623 DECODE : S0; // any decoder for FPU PUSH
6624 D0 : S1; // big decoder only
6625 FPU : S4;
6626 MEM : S3; // any mem
6627 %}
6628
6629 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6630 %{
6631 instruction_count(3);
6632 src1 : S3(read);
6633 src2 : S3(read);
6634 mem : S3(read);
6635 DECODE : S0(2); // any decoder for FPU PUSH
6636 D0 : S1; // big decoder only
6637 FPU : S4;
6638 MEM : S3; // any mem
6639 %}
6640
6641 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6642 %{
6643 instruction_count(3);
6644 src1 : S3(read);
6645 src2 : S3(read);
6646 mem : S4(read);
6647 DECODE : S0; // any decoder for FPU PUSH
6648 D0 : S0(2); // big decoder only
6649 FPU : S4;
6650 MEM : S3(2); // any mem
6651 %}
6652
6653 pipe_class fpu_mem_mem(memory dst, memory src1)
6654 %{
6655 instruction_count(2);
6656 src1 : S3(read);
6657 dst : S4(read);
6658 D0 : S0(2); // big decoder only
6659 MEM : S3(2); // any mem
6660 %}
6661
6662 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6663 %{
6664 instruction_count(3);
6665 src1 : S3(read);
6666 src2 : S3(read);
6667 dst : S4(read);
6668 D0 : S0(3); // big decoder only
6669 FPU : S4;
6670 MEM : S3(3); // any mem
6671 %}
6672
6673 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6674 %{
6675 instruction_count(3);
6676 src1 : S4(read);
6677 mem : S4(read);
6678 DECODE : S0; // any decoder for FPU PUSH
6679 D0 : S0(2); // big decoder only
6680 FPU : S4;
6681 MEM : S3(2); // any mem
6682 %}
6683
6684 // Float load constant
6685 pipe_class fpu_reg_con(regD dst)
6686 %{
6687 instruction_count(2);
6688 dst : S5(write);
6689 D0 : S0; // big decoder only for the load
6690 DECODE : S1; // any decoder for FPU POP
6691 FPU : S4;
6692 MEM : S3; // any mem
6693 %}
6694
6695 // Float load constant
6696 pipe_class fpu_reg_reg_con(regD dst, regD src)
6697 %{
6698 instruction_count(3);
6699 dst : S5(write);
6700 src : S3(read);
6701 D0 : S0; // big decoder only for the load
6702 DECODE : S1(2); // any decoder for FPU POP
6703 FPU : S4;
6704 MEM : S3; // any mem
6705 %}
6706
6707 // UnConditional branch
6708 pipe_class pipe_jmp(label labl)
6709 %{
6710 single_instruction;
6711 BR : S3;
6712 %}
6713
6714 // Conditional branch
6715 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6716 %{
6717 single_instruction;
6718 cr : S1(read);
6719 BR : S3;
6720 %}
6721
6722 // Allocation idiom
6723 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6724 %{
6725 instruction_count(1); force_serialization;
6726 fixed_latency(6);
6727 heap_ptr : S3(read);
6728 DECODE : S0(3);
6729 D0 : S2;
6730 MEM : S3;
6731 ALU : S3(2);
6732 dst : S5(write);
6733 BR : S5;
6734 %}
6735
6736 // Generic big/slow expanded idiom
6737 pipe_class pipe_slow()
6738 %{
6739 instruction_count(10); multiple_bundles; force_serialization;
6740 fixed_latency(100);
6741 D0 : S0(2);
6742 MEM : S3(2);
6743 %}
6744
6745 // The real do-nothing guy
6746 pipe_class empty()
6747 %{
6748 instruction_count(0);
6749 %}
6750
6751 // Define the class for the Nop node
6752 define
6753 %{
6754 MachNop = empty;
6755 %}
6756
6757 %}
6758
6759 //----------INSTRUCTIONS-------------------------------------------------------
6760 //
6761 // match -- States which machine-independent subtree may be replaced
6762 // by this instruction.
6763 // ins_cost -- The estimated cost of this instruction is used by instruction
6764 // selection to identify a minimum cost tree of machine
6765 // instructions that matches a tree of machine-independent
6766 // instructions.
6767 // format -- A string providing the disassembly for this instruction.
6768 // The value of an instruction's operand may be inserted
6769 // by referring to it with a '$' prefix.
6770 // opcode -- Three instruction opcodes may be provided. These are referred
6771 // to within an encode class as $primary, $secondary, and $tertiary
6772 // rrspectively. The primary opcode is commonly used to
6773 // indicate the type of machine instruction, while secondary
6774 // and tertiary are often used for prefix options or addressing
6775 // modes.
6776 // ins_encode -- A list of encode classes with parameters. The encode class
6777 // name must have been defined in an 'enc_class' specification
6778 // in the encode section of the architecture description.
6779
6780 // ============================================================================
6781
6782 instruct ShouldNotReachHere() %{
6783 match(Halt);
6784 format %{ "stop\t# ShouldNotReachHere" %}
6785 ins_encode %{
6786 if (is_reachable()) {
6787 const char* str = __ code_string(_halt_reason);
6788 __ stop(str);
6789 }
6790 %}
6791 ins_pipe(pipe_slow);
6792 %}
6793
6794 // ============================================================================
6795
6796 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6797 // Load Float
6798 instruct MoveF2VL(vlRegF dst, regF src) %{
6799 match(Set dst src);
6800 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6801 ins_encode %{
6802 ShouldNotReachHere();
6803 %}
6804 ins_pipe( fpu_reg_reg );
6805 %}
6806
6807 // Load Float
6808 instruct MoveF2LEG(legRegF dst, regF src) %{
6809 match(Set dst src);
6810 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6811 ins_encode %{
6812 ShouldNotReachHere();
6813 %}
6814 ins_pipe( fpu_reg_reg );
6815 %}
6816
6817 // Load Float
6818 instruct MoveVL2F(regF dst, vlRegF src) %{
6819 match(Set dst src);
6820 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6821 ins_encode %{
6822 ShouldNotReachHere();
6823 %}
6824 ins_pipe( fpu_reg_reg );
6825 %}
6826
6827 // Load Float
6828 instruct MoveLEG2F(regF dst, legRegF src) %{
6829 match(Set dst src);
6830 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6831 ins_encode %{
6832 ShouldNotReachHere();
6833 %}
6834 ins_pipe( fpu_reg_reg );
6835 %}
6836
6837 // Load Double
6838 instruct MoveD2VL(vlRegD dst, regD src) %{
6839 match(Set dst src);
6840 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6841 ins_encode %{
6842 ShouldNotReachHere();
6843 %}
6844 ins_pipe( fpu_reg_reg );
6845 %}
6846
6847 // Load Double
6848 instruct MoveD2LEG(legRegD dst, regD src) %{
6849 match(Set dst src);
6850 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6851 ins_encode %{
6852 ShouldNotReachHere();
6853 %}
6854 ins_pipe( fpu_reg_reg );
6855 %}
6856
6857 // Load Double
6858 instruct MoveVL2D(regD dst, vlRegD src) %{
6859 match(Set dst src);
6860 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6861 ins_encode %{
6862 ShouldNotReachHere();
6863 %}
6864 ins_pipe( fpu_reg_reg );
6865 %}
6866
6867 // Load Double
6868 instruct MoveLEG2D(regD dst, legRegD src) %{
6869 match(Set dst src);
6870 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6871 ins_encode %{
6872 ShouldNotReachHere();
6873 %}
6874 ins_pipe( fpu_reg_reg );
6875 %}
6876
6877 //----------Load/Store/Move Instructions---------------------------------------
6878 //----------Load Instructions--------------------------------------------------
6879
6880 // Load Byte (8 bit signed)
6881 instruct loadB(rRegI dst, memory mem)
6882 %{
6883 match(Set dst (LoadB mem));
6884
6885 ins_cost(125);
6886 format %{ "movsbl $dst, $mem\t# byte" %}
6887
6888 ins_encode %{
6889 __ movsbl($dst$$Register, $mem$$Address);
6890 %}
6891
6892 ins_pipe(ialu_reg_mem);
6893 %}
6894
6895 // Load Byte (8 bit signed) into Long Register
6896 instruct loadB2L(rRegL dst, memory mem)
6897 %{
6898 match(Set dst (ConvI2L (LoadB mem)));
6899
6900 ins_cost(125);
6901 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6902
6903 ins_encode %{
6904 __ movsbq($dst$$Register, $mem$$Address);
6905 %}
6906
6907 ins_pipe(ialu_reg_mem);
6908 %}
6909
6910 // Load Unsigned Byte (8 bit UNsigned)
6911 instruct loadUB(rRegI dst, memory mem)
6912 %{
6913 match(Set dst (LoadUB mem));
6914
6915 ins_cost(125);
6916 format %{ "movzbl $dst, $mem\t# ubyte" %}
6917
6918 ins_encode %{
6919 __ movzbl($dst$$Register, $mem$$Address);
6920 %}
6921
6922 ins_pipe(ialu_reg_mem);
6923 %}
6924
6925 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6926 instruct loadUB2L(rRegL dst, memory mem)
6927 %{
6928 match(Set dst (ConvI2L (LoadUB mem)));
6929
6930 ins_cost(125);
6931 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6932
6933 ins_encode %{
6934 __ movzbq($dst$$Register, $mem$$Address);
6935 %}
6936
6937 ins_pipe(ialu_reg_mem);
6938 %}
6939
6940 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6941 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6942 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6943 effect(KILL cr);
6944
6945 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6946 "andl $dst, right_n_bits($mask, 8)" %}
6947 ins_encode %{
6948 Register Rdst = $dst$$Register;
6949 __ movzbq(Rdst, $mem$$Address);
6950 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6951 %}
6952 ins_pipe(ialu_reg_mem);
6953 %}
6954
6955 // Load Short (16 bit signed)
6956 instruct loadS(rRegI dst, memory mem)
6957 %{
6958 match(Set dst (LoadS mem));
6959
6960 ins_cost(125);
6961 format %{ "movswl $dst, $mem\t# short" %}
6962
6963 ins_encode %{
6964 __ movswl($dst$$Register, $mem$$Address);
6965 %}
6966
6967 ins_pipe(ialu_reg_mem);
6968 %}
6969
6970 // Load Short (16 bit signed) to Byte (8 bit signed)
6971 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6972 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6973
6974 ins_cost(125);
6975 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6976 ins_encode %{
6977 __ movsbl($dst$$Register, $mem$$Address);
6978 %}
6979 ins_pipe(ialu_reg_mem);
6980 %}
6981
6982 // Load Short (16 bit signed) into Long Register
6983 instruct loadS2L(rRegL dst, memory mem)
6984 %{
6985 match(Set dst (ConvI2L (LoadS mem)));
6986
6987 ins_cost(125);
6988 format %{ "movswq $dst, $mem\t# short -> long" %}
6989
6990 ins_encode %{
6991 __ movswq($dst$$Register, $mem$$Address);
6992 %}
6993
6994 ins_pipe(ialu_reg_mem);
6995 %}
6996
6997 // Load Unsigned Short/Char (16 bit UNsigned)
6998 instruct loadUS(rRegI dst, memory mem)
6999 %{
7000 match(Set dst (LoadUS mem));
7001
7002 ins_cost(125);
7003 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7004
7005 ins_encode %{
7006 __ movzwl($dst$$Register, $mem$$Address);
7007 %}
7008
7009 ins_pipe(ialu_reg_mem);
7010 %}
7011
7012 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7013 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7014 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7015
7016 ins_cost(125);
7017 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7018 ins_encode %{
7019 __ movsbl($dst$$Register, $mem$$Address);
7020 %}
7021 ins_pipe(ialu_reg_mem);
7022 %}
7023
7024 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7025 instruct loadUS2L(rRegL dst, memory mem)
7026 %{
7027 match(Set dst (ConvI2L (LoadUS mem)));
7028
7029 ins_cost(125);
7030 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7031
7032 ins_encode %{
7033 __ movzwq($dst$$Register, $mem$$Address);
7034 %}
7035
7036 ins_pipe(ialu_reg_mem);
7037 %}
7038
7039 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7040 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7041 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7042
7043 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7044 ins_encode %{
7045 __ movzbq($dst$$Register, $mem$$Address);
7046 %}
7047 ins_pipe(ialu_reg_mem);
7048 %}
7049
7050 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7051 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7052 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7053 effect(KILL cr);
7054
7055 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7056 "andl $dst, right_n_bits($mask, 16)" %}
7057 ins_encode %{
7058 Register Rdst = $dst$$Register;
7059 __ movzwq(Rdst, $mem$$Address);
7060 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7061 %}
7062 ins_pipe(ialu_reg_mem);
7063 %}
7064
7065 // Load Integer
7066 instruct loadI(rRegI dst, memory mem)
7067 %{
7068 match(Set dst (LoadI mem));
7069
7070 ins_cost(125);
7071 format %{ "movl $dst, $mem\t# int" %}
7072
7073 ins_encode %{
7074 __ movl($dst$$Register, $mem$$Address);
7075 %}
7076
7077 ins_pipe(ialu_reg_mem);
7078 %}
7079
7080 // Load Integer (32 bit signed) to Byte (8 bit signed)
7081 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7082 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7083
7084 ins_cost(125);
7085 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7086 ins_encode %{
7087 __ movsbl($dst$$Register, $mem$$Address);
7088 %}
7089 ins_pipe(ialu_reg_mem);
7090 %}
7091
7092 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7093 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7094 match(Set dst (AndI (LoadI mem) mask));
7095
7096 ins_cost(125);
7097 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7098 ins_encode %{
7099 __ movzbl($dst$$Register, $mem$$Address);
7100 %}
7101 ins_pipe(ialu_reg_mem);
7102 %}
7103
7104 // Load Integer (32 bit signed) to Short (16 bit signed)
7105 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7106 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7107
7108 ins_cost(125);
7109 format %{ "movswl $dst, $mem\t# int -> short" %}
7110 ins_encode %{
7111 __ movswl($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem);
7114 %}
7115
7116 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7117 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7118 match(Set dst (AndI (LoadI mem) mask));
7119
7120 ins_cost(125);
7121 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7122 ins_encode %{
7123 __ movzwl($dst$$Register, $mem$$Address);
7124 %}
7125 ins_pipe(ialu_reg_mem);
7126 %}
7127
7128 // Load Integer into Long Register
7129 instruct loadI2L(rRegL dst, memory mem)
7130 %{
7131 match(Set dst (ConvI2L (LoadI mem)));
7132
7133 ins_cost(125);
7134 format %{ "movslq $dst, $mem\t# int -> long" %}
7135
7136 ins_encode %{
7137 __ movslq($dst$$Register, $mem$$Address);
7138 %}
7139
7140 ins_pipe(ialu_reg_mem);
7141 %}
7142
7143 // Load Integer with mask 0xFF into Long Register
7144 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7145 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7146
7147 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7148 ins_encode %{
7149 __ movzbq($dst$$Register, $mem$$Address);
7150 %}
7151 ins_pipe(ialu_reg_mem);
7152 %}
7153
7154 // Load Integer with mask 0xFFFF into Long Register
7155 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7156 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7157
7158 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7159 ins_encode %{
7160 __ movzwq($dst$$Register, $mem$$Address);
7161 %}
7162 ins_pipe(ialu_reg_mem);
7163 %}
7164
7165 // Load Integer with a 31-bit mask into Long Register
7166 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7167 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7168 effect(KILL cr);
7169
7170 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7171 "andl $dst, $mask" %}
7172 ins_encode %{
7173 Register Rdst = $dst$$Register;
7174 __ movl(Rdst, $mem$$Address);
7175 __ andl(Rdst, $mask$$constant);
7176 %}
7177 ins_pipe(ialu_reg_mem);
7178 %}
7179
7180 // Load Unsigned Integer into Long Register
7181 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7182 %{
7183 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7184
7185 ins_cost(125);
7186 format %{ "movl $dst, $mem\t# uint -> long" %}
7187
7188 ins_encode %{
7189 __ movl($dst$$Register, $mem$$Address);
7190 %}
7191
7192 ins_pipe(ialu_reg_mem);
7193 %}
7194
7195 // Load Long
7196 instruct loadL(rRegL dst, memory mem)
7197 %{
7198 match(Set dst (LoadL mem));
7199
7200 ins_cost(125);
7201 format %{ "movq $dst, $mem\t# long" %}
7202
7203 ins_encode %{
7204 __ movq($dst$$Register, $mem$$Address);
7205 %}
7206
7207 ins_pipe(ialu_reg_mem); // XXX
7208 %}
7209
7210 // Load Range
7211 instruct loadRange(rRegI dst, memory mem)
7212 %{
7213 match(Set dst (LoadRange mem));
7214
7215 ins_cost(125); // XXX
7216 format %{ "movl $dst, $mem\t# range" %}
7217 ins_encode %{
7218 __ movl($dst$$Register, $mem$$Address);
7219 %}
7220 ins_pipe(ialu_reg_mem);
7221 %}
7222
7223 // Load Pointer
7224 instruct loadP(rRegP dst, memory mem)
7225 %{
7226 match(Set dst (LoadP mem));
7227 predicate(n->as_Load()->barrier_data() == 0);
7228
7229 ins_cost(125); // XXX
7230 format %{ "movq $dst, $mem\t# ptr" %}
7231 ins_encode %{
7232 __ movq($dst$$Register, $mem$$Address);
7233 %}
7234 ins_pipe(ialu_reg_mem); // XXX
7235 %}
7236
7237 // Load Compressed Pointer
7238 instruct loadN(rRegN dst, memory mem)
7239 %{
7240 predicate(n->as_Load()->barrier_data() == 0);
7241 match(Set dst (LoadN mem));
7242
7243 ins_cost(125); // XXX
7244 format %{ "movl $dst, $mem\t# compressed ptr" %}
7245 ins_encode %{
7246 __ movl($dst$$Register, $mem$$Address);
7247 %}
7248 ins_pipe(ialu_reg_mem); // XXX
7249 %}
7250
7251
7252 // Load Klass Pointer
7253 instruct loadKlass(rRegP dst, memory mem)
7254 %{
7255 match(Set dst (LoadKlass mem));
7256
7257 ins_cost(125); // XXX
7258 format %{ "movq $dst, $mem\t# class" %}
7259 ins_encode %{
7260 __ movq($dst$$Register, $mem$$Address);
7261 %}
7262 ins_pipe(ialu_reg_mem); // XXX
7263 %}
7264
7265 // Load narrow Klass Pointer
7266 instruct loadNKlass(rRegN dst, memory mem)
7267 %{
7268 predicate(!UseCompactObjectHeaders);
7269 match(Set dst (LoadNKlass mem));
7270
7271 ins_cost(125); // XXX
7272 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7273 ins_encode %{
7274 __ movl($dst$$Register, $mem$$Address);
7275 %}
7276 ins_pipe(ialu_reg_mem); // XXX
7277 %}
7278
7279 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7280 %{
7281 predicate(UseCompactObjectHeaders);
7282 match(Set dst (LoadNKlass mem));
7283 effect(KILL cr);
7284 ins_cost(125);
7285 format %{
7286 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7287 "shrl $dst, markWord::klass_shift_at_offset"
7288 %}
7289 ins_encode %{
7290 __ movl($dst$$Register, $mem$$Address);
7291 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7292 %}
7293 ins_pipe(ialu_reg_mem);
7294 %}
7295
7296 // Load Float
7297 instruct loadF(regF dst, memory mem)
7298 %{
7299 match(Set dst (LoadF mem));
7300
7301 ins_cost(145); // XXX
7302 format %{ "movss $dst, $mem\t# float" %}
7303 ins_encode %{
7304 __ movflt($dst$$XMMRegister, $mem$$Address);
7305 %}
7306 ins_pipe(pipe_slow); // XXX
7307 %}
7308
7309 // Load Double
7310 instruct loadD_partial(regD dst, memory mem)
7311 %{
7312 predicate(!UseXmmLoadAndClearUpper);
7313 match(Set dst (LoadD mem));
7314
7315 ins_cost(145); // XXX
7316 format %{ "movlpd $dst, $mem\t# double" %}
7317 ins_encode %{
7318 __ movdbl($dst$$XMMRegister, $mem$$Address);
7319 %}
7320 ins_pipe(pipe_slow); // XXX
7321 %}
7322
7323 instruct loadD(regD dst, memory mem)
7324 %{
7325 predicate(UseXmmLoadAndClearUpper);
7326 match(Set dst (LoadD mem));
7327
7328 ins_cost(145); // XXX
7329 format %{ "movsd $dst, $mem\t# double" %}
7330 ins_encode %{
7331 __ movdbl($dst$$XMMRegister, $mem$$Address);
7332 %}
7333 ins_pipe(pipe_slow); // XXX
7334 %}
7335
7336 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7337 %{
7338 match(Set dst con);
7339
7340 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7341
7342 ins_encode %{
7343 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7344 %}
7345
7346 ins_pipe(ialu_reg_fat);
7347 %}
7348
7349 // min = java.lang.Math.min(float a, float b)
7350 // max = java.lang.Math.max(float a, float b)
7351 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7352 %{
7353 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7354 match(Set dst (MaxF a b));
7355 match(Set dst (MinF a b));
7356
7357 format %{ "minmaxF $dst, $a, $b" %}
7358 ins_encode %{
7359 int opcode = this->ideal_Opcode();
7360 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7361 %}
7362 ins_pipe( pipe_slow );
7363 %}
7364
7365 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
7366 %{
7367 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7368 match(Set dst (MaxF a b));
7369 match(Set dst (MinF a b));
7370 effect(USE a, USE b, TEMP rtmp, KILL cr);
7371
7372 format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7373 ins_encode %{
7374 int opcode = this->ideal_Opcode();
7375 bool min = (opcode == Op_MinF) ? true : false;
7376 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7377 min, fp_prec_flt /*pt*/);
7378 %}
7379 ins_pipe( pipe_slow );
7380 %}
7381
7382 // min = java.lang.Math.min(float a, float b)
7383 // max = java.lang.Math.max(float a, float b)
7384 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7385 %{
7386 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7387 match(Set dst (MaxF a b));
7388 match(Set dst (MinF a b));
7389 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7390
7391 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7392 ins_encode %{
7393 int opcode = this->ideal_Opcode();
7394 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7395 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7396 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7397 %}
7398 ins_pipe( pipe_slow );
7399 %}
7400
7401 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
7402 %{
7403 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7404 match(Set dst (MaxF a b));
7405 match(Set dst (MinF a b));
7406 effect(USE a, USE b, TEMP rtmp, KILL cr);
7407
7408 format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
7409 ins_encode %{
7410 int opcode = this->ideal_Opcode();
7411 bool min = (opcode == Op_MinF) ? true : false;
7412 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7413 min, fp_prec_flt /*pt*/);
7414 %}
7415 ins_pipe( pipe_slow );
7416 %}
7417
7418 // min = java.lang.Math.min(double a, double b)
7419 // max = java.lang.Math.max(double a, double b)
7420 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7421 %{
7422 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7423 match(Set dst (MaxD a b));
7424 match(Set dst (MinD a b));
7425
7426 format %{ "minmaxD $dst, $a, $b" %}
7427 ins_encode %{
7428 int opcode = this->ideal_Opcode();
7429 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7430 %}
7431 ins_pipe( pipe_slow );
7432 %}
7433
7434 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
7435 %{
7436 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7437 match(Set dst (MaxD a b));
7438 match(Set dst (MinD a b));
7439 effect(USE a, USE b, TEMP rtmp, KILL cr);
7440
7441 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7442 ins_encode %{
7443 int opcode = this->ideal_Opcode();
7444 bool min = (opcode == Op_MinD) ? true : false;
7445 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7446 min, fp_prec_dbl /*pt*/);
7447 %}
7448 ins_pipe( pipe_slow );
7449 %}
7450
7451 // min = java.lang.Math.min(double a, double b)
7452 // max = java.lang.Math.max(double a, double b)
7453 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7454 %{
7455 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7456 match(Set dst (MaxD a b));
7457 match(Set dst (MinD a b));
7458 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7459
7460 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7461 ins_encode %{
7462 int opcode = this->ideal_Opcode();
7463 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7464 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7465 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7466 %}
7467 ins_pipe( pipe_slow );
7468 %}
7469
7470 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
7471 %{
7472 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7473 match(Set dst (MaxD a b));
7474 match(Set dst (MinD a b));
7475 effect(USE a, USE b, TEMP rtmp, KILL cr);
7476
7477 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7478 ins_encode %{
7479 int opcode = this->ideal_Opcode();
7480 bool min = (opcode == Op_MinD) ? true : false;
7481 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7482 min, fp_prec_dbl /*pt*/);
7483 %}
7484 ins_pipe( pipe_slow );
7485 %}
7486
7487 // Load Effective Address
7488 instruct leaP8(rRegP dst, indOffset8 mem)
7489 %{
7490 match(Set dst mem);
7491
7492 ins_cost(110); // XXX
7493 format %{ "leaq $dst, $mem\t# ptr 8" %}
7494 ins_encode %{
7495 __ leaq($dst$$Register, $mem$$Address);
7496 %}
7497 ins_pipe(ialu_reg_reg_fat);
7498 %}
7499
7500 instruct leaP32(rRegP dst, indOffset32 mem)
7501 %{
7502 match(Set dst mem);
7503
7504 ins_cost(110);
7505 format %{ "leaq $dst, $mem\t# ptr 32" %}
7506 ins_encode %{
7507 __ leaq($dst$$Register, $mem$$Address);
7508 %}
7509 ins_pipe(ialu_reg_reg_fat);
7510 %}
7511
7512 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7513 %{
7514 match(Set dst mem);
7515
7516 ins_cost(110);
7517 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7518 ins_encode %{
7519 __ leaq($dst$$Register, $mem$$Address);
7520 %}
7521 ins_pipe(ialu_reg_reg_fat);
7522 %}
7523
7524 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7525 %{
7526 match(Set dst mem);
7527
7528 ins_cost(110);
7529 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7530 ins_encode %{
7531 __ leaq($dst$$Register, $mem$$Address);
7532 %}
7533 ins_pipe(ialu_reg_reg_fat);
7534 %}
7535
7536 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7537 %{
7538 match(Set dst mem);
7539
7540 ins_cost(110);
7541 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7542 ins_encode %{
7543 __ leaq($dst$$Register, $mem$$Address);
7544 %}
7545 ins_pipe(ialu_reg_reg_fat);
7546 %}
7547
7548 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7549 %{
7550 match(Set dst mem);
7551
7552 ins_cost(110);
7553 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7554 ins_encode %{
7555 __ leaq($dst$$Register, $mem$$Address);
7556 %}
7557 ins_pipe(ialu_reg_reg_fat);
7558 %}
7559
7560 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7561 %{
7562 match(Set dst mem);
7563
7564 ins_cost(110);
7565 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7566 ins_encode %{
7567 __ leaq($dst$$Register, $mem$$Address);
7568 %}
7569 ins_pipe(ialu_reg_reg_fat);
7570 %}
7571
7572 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7573 %{
7574 match(Set dst mem);
7575
7576 ins_cost(110);
7577 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7578 ins_encode %{
7579 __ leaq($dst$$Register, $mem$$Address);
7580 %}
7581 ins_pipe(ialu_reg_reg_fat);
7582 %}
7583
7584 // Load Effective Address which uses Narrow (32-bits) oop
7585 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7586 %{
7587 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7588 match(Set dst mem);
7589
7590 ins_cost(110);
7591 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7592 ins_encode %{
7593 __ leaq($dst$$Register, $mem$$Address);
7594 %}
7595 ins_pipe(ialu_reg_reg_fat);
7596 %}
7597
7598 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7599 %{
7600 predicate(CompressedOops::shift() == 0);
7601 match(Set dst mem);
7602
7603 ins_cost(110); // XXX
7604 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7605 ins_encode %{
7606 __ leaq($dst$$Register, $mem$$Address);
7607 %}
7608 ins_pipe(ialu_reg_reg_fat);
7609 %}
7610
7611 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7612 %{
7613 predicate(CompressedOops::shift() == 0);
7614 match(Set dst mem);
7615
7616 ins_cost(110);
7617 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7618 ins_encode %{
7619 __ leaq($dst$$Register, $mem$$Address);
7620 %}
7621 ins_pipe(ialu_reg_reg_fat);
7622 %}
7623
7624 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7625 %{
7626 predicate(CompressedOops::shift() == 0);
7627 match(Set dst mem);
7628
7629 ins_cost(110);
7630 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7631 ins_encode %{
7632 __ leaq($dst$$Register, $mem$$Address);
7633 %}
7634 ins_pipe(ialu_reg_reg_fat);
7635 %}
7636
7637 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7638 %{
7639 predicate(CompressedOops::shift() == 0);
7640 match(Set dst mem);
7641
7642 ins_cost(110);
7643 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7644 ins_encode %{
7645 __ leaq($dst$$Register, $mem$$Address);
7646 %}
7647 ins_pipe(ialu_reg_reg_fat);
7648 %}
7649
7650 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7651 %{
7652 predicate(CompressedOops::shift() == 0);
7653 match(Set dst mem);
7654
7655 ins_cost(110);
7656 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7657 ins_encode %{
7658 __ leaq($dst$$Register, $mem$$Address);
7659 %}
7660 ins_pipe(ialu_reg_reg_fat);
7661 %}
7662
7663 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7664 %{
7665 predicate(CompressedOops::shift() == 0);
7666 match(Set dst mem);
7667
7668 ins_cost(110);
7669 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7670 ins_encode %{
7671 __ leaq($dst$$Register, $mem$$Address);
7672 %}
7673 ins_pipe(ialu_reg_reg_fat);
7674 %}
7675
7676 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7677 %{
7678 predicate(CompressedOops::shift() == 0);
7679 match(Set dst mem);
7680
7681 ins_cost(110);
7682 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7683 ins_encode %{
7684 __ leaq($dst$$Register, $mem$$Address);
7685 %}
7686 ins_pipe(ialu_reg_reg_fat);
7687 %}
7688
7689 instruct loadConI(rRegI dst, immI src)
7690 %{
7691 match(Set dst src);
7692
7693 format %{ "movl $dst, $src\t# int" %}
7694 ins_encode %{
7695 __ movl($dst$$Register, $src$$constant);
7696 %}
7697 ins_pipe(ialu_reg_fat); // XXX
7698 %}
7699
7700 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7701 %{
7702 match(Set dst src);
7703 effect(KILL cr);
7704
7705 ins_cost(50);
7706 format %{ "xorl $dst, $dst\t# int" %}
7707 ins_encode %{
7708 __ xorl($dst$$Register, $dst$$Register);
7709 %}
7710 ins_pipe(ialu_reg);
7711 %}
7712
7713 instruct loadConL(rRegL dst, immL src)
7714 %{
7715 match(Set dst src);
7716
7717 ins_cost(150);
7718 format %{ "movq $dst, $src\t# long" %}
7719 ins_encode %{
7720 __ mov64($dst$$Register, $src$$constant);
7721 %}
7722 ins_pipe(ialu_reg);
7723 %}
7724
7725 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7726 %{
7727 match(Set dst src);
7728 effect(KILL cr);
7729
7730 ins_cost(50);
7731 format %{ "xorl $dst, $dst\t# long" %}
7732 ins_encode %{
7733 __ xorl($dst$$Register, $dst$$Register);
7734 %}
7735 ins_pipe(ialu_reg); // XXX
7736 %}
7737
7738 instruct loadConUL32(rRegL dst, immUL32 src)
7739 %{
7740 match(Set dst src);
7741
7742 ins_cost(60);
7743 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7744 ins_encode %{
7745 __ movl($dst$$Register, $src$$constant);
7746 %}
7747 ins_pipe(ialu_reg);
7748 %}
7749
7750 instruct loadConL32(rRegL dst, immL32 src)
7751 %{
7752 match(Set dst src);
7753
7754 ins_cost(70);
7755 format %{ "movq $dst, $src\t# long (32-bit)" %}
7756 ins_encode %{
7757 __ movq($dst$$Register, $src$$constant);
7758 %}
7759 ins_pipe(ialu_reg);
7760 %}
7761
7762 instruct loadConP(rRegP dst, immP con) %{
7763 match(Set dst con);
7764
7765 format %{ "movq $dst, $con\t# ptr" %}
7766 ins_encode %{
7767 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7768 %}
7769 ins_pipe(ialu_reg_fat); // XXX
7770 %}
7771
7772 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7773 %{
7774 match(Set dst src);
7775 effect(KILL cr);
7776
7777 ins_cost(50);
7778 format %{ "xorl $dst, $dst\t# ptr" %}
7779 ins_encode %{
7780 __ xorl($dst$$Register, $dst$$Register);
7781 %}
7782 ins_pipe(ialu_reg);
7783 %}
7784
7785 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7786 %{
7787 match(Set dst src);
7788 effect(KILL cr);
7789
7790 ins_cost(60);
7791 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7792 ins_encode %{
7793 __ movl($dst$$Register, $src$$constant);
7794 %}
7795 ins_pipe(ialu_reg);
7796 %}
7797
7798 instruct loadConF(regF dst, immF con) %{
7799 match(Set dst con);
7800 ins_cost(125);
7801 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7802 ins_encode %{
7803 __ movflt($dst$$XMMRegister, $constantaddress($con));
7804 %}
7805 ins_pipe(pipe_slow);
7806 %}
7807
7808 instruct loadConH(regF dst, immH con) %{
7809 match(Set dst con);
7810 ins_cost(125);
7811 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7812 ins_encode %{
7813 __ movflt($dst$$XMMRegister, $constantaddress($con));
7814 %}
7815 ins_pipe(pipe_slow);
7816 %}
7817
7818 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7819 match(Set dst src);
7820 effect(KILL cr);
7821 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7822 ins_encode %{
7823 __ xorq($dst$$Register, $dst$$Register);
7824 %}
7825 ins_pipe(ialu_reg);
7826 %}
7827
7828 instruct loadConN(rRegN dst, immN src) %{
7829 match(Set dst src);
7830
7831 ins_cost(125);
7832 format %{ "movl $dst, $src\t# compressed ptr" %}
7833 ins_encode %{
7834 address con = (address)$src$$constant;
7835 if (con == nullptr) {
7836 ShouldNotReachHere();
7837 } else {
7838 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7839 }
7840 %}
7841 ins_pipe(ialu_reg_fat); // XXX
7842 %}
7843
7844 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7845 match(Set dst src);
7846
7847 ins_cost(125);
7848 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7849 ins_encode %{
7850 address con = (address)$src$$constant;
7851 if (con == nullptr) {
7852 ShouldNotReachHere();
7853 } else {
7854 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7855 }
7856 %}
7857 ins_pipe(ialu_reg_fat); // XXX
7858 %}
7859
7860 instruct loadConF0(regF dst, immF0 src)
7861 %{
7862 match(Set dst src);
7863 ins_cost(100);
7864
7865 format %{ "xorps $dst, $dst\t# float 0.0" %}
7866 ins_encode %{
7867 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7868 %}
7869 ins_pipe(pipe_slow);
7870 %}
7871
7872 // Use the same format since predicate() can not be used here.
7873 instruct loadConD(regD dst, immD con) %{
7874 match(Set dst con);
7875 ins_cost(125);
7876 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7877 ins_encode %{
7878 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7879 %}
7880 ins_pipe(pipe_slow);
7881 %}
7882
7883 instruct loadConD0(regD dst, immD0 src)
7884 %{
7885 match(Set dst src);
7886 ins_cost(100);
7887
7888 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7889 ins_encode %{
7890 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7891 %}
7892 ins_pipe(pipe_slow);
7893 %}
7894
7895 instruct loadSSI(rRegI dst, stackSlotI src)
7896 %{
7897 match(Set dst src);
7898
7899 ins_cost(125);
7900 format %{ "movl $dst, $src\t# int stk" %}
7901 ins_encode %{
7902 __ movl($dst$$Register, $src$$Address);
7903 %}
7904 ins_pipe(ialu_reg_mem);
7905 %}
7906
7907 instruct loadSSL(rRegL dst, stackSlotL src)
7908 %{
7909 match(Set dst src);
7910
7911 ins_cost(125);
7912 format %{ "movq $dst, $src\t# long stk" %}
7913 ins_encode %{
7914 __ movq($dst$$Register, $src$$Address);
7915 %}
7916 ins_pipe(ialu_reg_mem);
7917 %}
7918
7919 instruct loadSSP(rRegP dst, stackSlotP src)
7920 %{
7921 match(Set dst src);
7922
7923 ins_cost(125);
7924 format %{ "movq $dst, $src\t# ptr stk" %}
7925 ins_encode %{
7926 __ movq($dst$$Register, $src$$Address);
7927 %}
7928 ins_pipe(ialu_reg_mem);
7929 %}
7930
7931 instruct loadSSF(regF dst, stackSlotF src)
7932 %{
7933 match(Set dst src);
7934
7935 ins_cost(125);
7936 format %{ "movss $dst, $src\t# float stk" %}
7937 ins_encode %{
7938 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7939 %}
7940 ins_pipe(pipe_slow); // XXX
7941 %}
7942
7943 // Use the same format since predicate() can not be used here.
7944 instruct loadSSD(regD dst, stackSlotD src)
7945 %{
7946 match(Set dst src);
7947
7948 ins_cost(125);
7949 format %{ "movsd $dst, $src\t# double stk" %}
7950 ins_encode %{
7951 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7952 %}
7953 ins_pipe(pipe_slow); // XXX
7954 %}
7955
7956 // Prefetch instructions for allocation.
7957 // Must be safe to execute with invalid address (cannot fault).
7958
7959 instruct prefetchAlloc( memory mem ) %{
7960 predicate(AllocatePrefetchInstr==3);
7961 match(PrefetchAllocation mem);
7962 ins_cost(125);
7963
7964 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7965 ins_encode %{
7966 __ prefetchw($mem$$Address);
7967 %}
7968 ins_pipe(ialu_mem);
7969 %}
7970
7971 instruct prefetchAllocNTA( memory mem ) %{
7972 predicate(AllocatePrefetchInstr==0);
7973 match(PrefetchAllocation mem);
7974 ins_cost(125);
7975
7976 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7977 ins_encode %{
7978 __ prefetchnta($mem$$Address);
7979 %}
7980 ins_pipe(ialu_mem);
7981 %}
7982
7983 instruct prefetchAllocT0( memory mem ) %{
7984 predicate(AllocatePrefetchInstr==1);
7985 match(PrefetchAllocation mem);
7986 ins_cost(125);
7987
7988 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7989 ins_encode %{
7990 __ prefetcht0($mem$$Address);
7991 %}
7992 ins_pipe(ialu_mem);
7993 %}
7994
7995 instruct prefetchAllocT2( memory mem ) %{
7996 predicate(AllocatePrefetchInstr==2);
7997 match(PrefetchAllocation mem);
7998 ins_cost(125);
7999
8000 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8001 ins_encode %{
8002 __ prefetcht2($mem$$Address);
8003 %}
8004 ins_pipe(ialu_mem);
8005 %}
8006
8007 //----------Store Instructions-------------------------------------------------
8008
8009 // Store Byte
8010 instruct storeB(memory mem, rRegI src)
8011 %{
8012 match(Set mem (StoreB mem src));
8013
8014 ins_cost(125); // XXX
8015 format %{ "movb $mem, $src\t# byte" %}
8016 ins_encode %{
8017 __ movb($mem$$Address, $src$$Register);
8018 %}
8019 ins_pipe(ialu_mem_reg);
8020 %}
8021
8022 // Store Char/Short
8023 instruct storeC(memory mem, rRegI src)
8024 %{
8025 match(Set mem (StoreC mem src));
8026
8027 ins_cost(125); // XXX
8028 format %{ "movw $mem, $src\t# char/short" %}
8029 ins_encode %{
8030 __ movw($mem$$Address, $src$$Register);
8031 %}
8032 ins_pipe(ialu_mem_reg);
8033 %}
8034
8035 // Store Integer
8036 instruct storeI(memory mem, rRegI src)
8037 %{
8038 match(Set mem (StoreI mem src));
8039
8040 ins_cost(125); // XXX
8041 format %{ "movl $mem, $src\t# int" %}
8042 ins_encode %{
8043 __ movl($mem$$Address, $src$$Register);
8044 %}
8045 ins_pipe(ialu_mem_reg);
8046 %}
8047
8048 // Store Long
8049 instruct storeL(memory mem, rRegL src)
8050 %{
8051 match(Set mem (StoreL mem src));
8052
8053 ins_cost(125); // XXX
8054 format %{ "movq $mem, $src\t# long" %}
8055 ins_encode %{
8056 __ movq($mem$$Address, $src$$Register);
8057 %}
8058 ins_pipe(ialu_mem_reg); // XXX
8059 %}
8060
8061 // Store Pointer
8062 instruct storeP(memory mem, any_RegP src)
8063 %{
8064 predicate(n->as_Store()->barrier_data() == 0);
8065 match(Set mem (StoreP mem src));
8066
8067 ins_cost(125); // XXX
8068 format %{ "movq $mem, $src\t# ptr" %}
8069 ins_encode %{
8070 __ movq($mem$$Address, $src$$Register);
8071 %}
8072 ins_pipe(ialu_mem_reg);
8073 %}
8074
8075 instruct storeImmP0(memory mem, immP0 zero)
8076 %{
8077 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8078 match(Set mem (StoreP mem zero));
8079
8080 ins_cost(125); // XXX
8081 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8082 ins_encode %{
8083 __ movq($mem$$Address, r12);
8084 %}
8085 ins_pipe(ialu_mem_reg);
8086 %}
8087
8088 // Store Null Pointer, mark word, or other simple pointer constant.
8089 instruct storeImmP(memory mem, immP31 src)
8090 %{
8091 predicate(n->as_Store()->barrier_data() == 0);
8092 match(Set mem (StoreP mem src));
8093
8094 ins_cost(150); // XXX
8095 format %{ "movq $mem, $src\t# ptr" %}
8096 ins_encode %{
8097 __ movq($mem$$Address, $src$$constant);
8098 %}
8099 ins_pipe(ialu_mem_imm);
8100 %}
8101
8102 // Store Compressed Pointer
8103 instruct storeN(memory mem, rRegN src)
8104 %{
8105 predicate(n->as_Store()->barrier_data() == 0);
8106 match(Set mem (StoreN mem src));
8107
8108 ins_cost(125); // XXX
8109 format %{ "movl $mem, $src\t# compressed ptr" %}
8110 ins_encode %{
8111 __ movl($mem$$Address, $src$$Register);
8112 %}
8113 ins_pipe(ialu_mem_reg);
8114 %}
8115
8116 instruct storeNKlass(memory mem, rRegN src)
8117 %{
8118 match(Set mem (StoreNKlass mem src));
8119
8120 ins_cost(125); // XXX
8121 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8122 ins_encode %{
8123 __ movl($mem$$Address, $src$$Register);
8124 %}
8125 ins_pipe(ialu_mem_reg);
8126 %}
8127
8128 instruct storeImmN0(memory mem, immN0 zero)
8129 %{
8130 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8131 match(Set mem (StoreN mem zero));
8132
8133 ins_cost(125); // XXX
8134 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8135 ins_encode %{
8136 __ movl($mem$$Address, r12);
8137 %}
8138 ins_pipe(ialu_mem_reg);
8139 %}
8140
8141 instruct storeImmN(memory mem, immN src)
8142 %{
8143 predicate(n->as_Store()->barrier_data() == 0);
8144 match(Set mem (StoreN mem src));
8145
8146 ins_cost(150); // XXX
8147 format %{ "movl $mem, $src\t# compressed ptr" %}
8148 ins_encode %{
8149 address con = (address)$src$$constant;
8150 if (con == nullptr) {
8151 __ movl($mem$$Address, 0);
8152 } else {
8153 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8154 }
8155 %}
8156 ins_pipe(ialu_mem_imm);
8157 %}
8158
8159 instruct storeImmNKlass(memory mem, immNKlass src)
8160 %{
8161 match(Set mem (StoreNKlass mem src));
8162
8163 ins_cost(150); // XXX
8164 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8165 ins_encode %{
8166 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8167 %}
8168 ins_pipe(ialu_mem_imm);
8169 %}
8170
8171 // Store Integer Immediate
8172 instruct storeImmI0(memory mem, immI_0 zero)
8173 %{
8174 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8175 match(Set mem (StoreI mem zero));
8176
8177 ins_cost(125); // XXX
8178 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8179 ins_encode %{
8180 __ movl($mem$$Address, r12);
8181 %}
8182 ins_pipe(ialu_mem_reg);
8183 %}
8184
8185 instruct storeImmI(memory mem, immI src)
8186 %{
8187 match(Set mem (StoreI mem src));
8188
8189 ins_cost(150);
8190 format %{ "movl $mem, $src\t# int" %}
8191 ins_encode %{
8192 __ movl($mem$$Address, $src$$constant);
8193 %}
8194 ins_pipe(ialu_mem_imm);
8195 %}
8196
8197 // Store Long Immediate
8198 instruct storeImmL0(memory mem, immL0 zero)
8199 %{
8200 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8201 match(Set mem (StoreL mem zero));
8202
8203 ins_cost(125); // XXX
8204 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8205 ins_encode %{
8206 __ movq($mem$$Address, r12);
8207 %}
8208 ins_pipe(ialu_mem_reg);
8209 %}
8210
8211 instruct storeImmL(memory mem, immL32 src)
8212 %{
8213 match(Set mem (StoreL mem src));
8214
8215 ins_cost(150);
8216 format %{ "movq $mem, $src\t# long" %}
8217 ins_encode %{
8218 __ movq($mem$$Address, $src$$constant);
8219 %}
8220 ins_pipe(ialu_mem_imm);
8221 %}
8222
8223 // Store Short/Char Immediate
8224 instruct storeImmC0(memory mem, immI_0 zero)
8225 %{
8226 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8227 match(Set mem (StoreC mem zero));
8228
8229 ins_cost(125); // XXX
8230 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8231 ins_encode %{
8232 __ movw($mem$$Address, r12);
8233 %}
8234 ins_pipe(ialu_mem_reg);
8235 %}
8236
8237 instruct storeImmI16(memory mem, immI16 src)
8238 %{
8239 predicate(UseStoreImmI16);
8240 match(Set mem (StoreC mem src));
8241
8242 ins_cost(150);
8243 format %{ "movw $mem, $src\t# short/char" %}
8244 ins_encode %{
8245 __ movw($mem$$Address, $src$$constant);
8246 %}
8247 ins_pipe(ialu_mem_imm);
8248 %}
8249
8250 // Store Byte Immediate
8251 instruct storeImmB0(memory mem, immI_0 zero)
8252 %{
8253 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8254 match(Set mem (StoreB mem zero));
8255
8256 ins_cost(125); // XXX
8257 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8258 ins_encode %{
8259 __ movb($mem$$Address, r12);
8260 %}
8261 ins_pipe(ialu_mem_reg);
8262 %}
8263
8264 instruct storeImmB(memory mem, immI8 src)
8265 %{
8266 match(Set mem (StoreB mem src));
8267
8268 ins_cost(150); // XXX
8269 format %{ "movb $mem, $src\t# byte" %}
8270 ins_encode %{
8271 __ movb($mem$$Address, $src$$constant);
8272 %}
8273 ins_pipe(ialu_mem_imm);
8274 %}
8275
8276 // Store Float
8277 instruct storeF(memory mem, regF src)
8278 %{
8279 match(Set mem (StoreF mem src));
8280
8281 ins_cost(95); // XXX
8282 format %{ "movss $mem, $src\t# float" %}
8283 ins_encode %{
8284 __ movflt($mem$$Address, $src$$XMMRegister);
8285 %}
8286 ins_pipe(pipe_slow); // XXX
8287 %}
8288
8289 // Store immediate Float value (it is faster than store from XMM register)
8290 instruct storeF0(memory mem, immF0 zero)
8291 %{
8292 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8293 match(Set mem (StoreF mem zero));
8294
8295 ins_cost(25); // XXX
8296 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8297 ins_encode %{
8298 __ movl($mem$$Address, r12);
8299 %}
8300 ins_pipe(ialu_mem_reg);
8301 %}
8302
8303 instruct storeF_imm(memory mem, immF src)
8304 %{
8305 match(Set mem (StoreF mem src));
8306
8307 ins_cost(50);
8308 format %{ "movl $mem, $src\t# float" %}
8309 ins_encode %{
8310 __ movl($mem$$Address, jint_cast($src$$constant));
8311 %}
8312 ins_pipe(ialu_mem_imm);
8313 %}
8314
8315 // Store Double
8316 instruct storeD(memory mem, regD src)
8317 %{
8318 match(Set mem (StoreD mem src));
8319
8320 ins_cost(95); // XXX
8321 format %{ "movsd $mem, $src\t# double" %}
8322 ins_encode %{
8323 __ movdbl($mem$$Address, $src$$XMMRegister);
8324 %}
8325 ins_pipe(pipe_slow); // XXX
8326 %}
8327
8328 // Store immediate double 0.0 (it is faster than store from XMM register)
8329 instruct storeD0_imm(memory mem, immD0 src)
8330 %{
8331 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8332 match(Set mem (StoreD mem src));
8333
8334 ins_cost(50);
8335 format %{ "movq $mem, $src\t# double 0." %}
8336 ins_encode %{
8337 __ movq($mem$$Address, $src$$constant);
8338 %}
8339 ins_pipe(ialu_mem_imm);
8340 %}
8341
8342 instruct storeD0(memory mem, immD0 zero)
8343 %{
8344 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8345 match(Set mem (StoreD mem zero));
8346
8347 ins_cost(25); // XXX
8348 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8349 ins_encode %{
8350 __ movq($mem$$Address, r12);
8351 %}
8352 ins_pipe(ialu_mem_reg);
8353 %}
8354
8355 instruct storeSSI(stackSlotI dst, rRegI src)
8356 %{
8357 match(Set dst src);
8358
8359 ins_cost(100);
8360 format %{ "movl $dst, $src\t# int stk" %}
8361 ins_encode %{
8362 __ movl($dst$$Address, $src$$Register);
8363 %}
8364 ins_pipe( ialu_mem_reg );
8365 %}
8366
8367 instruct storeSSL(stackSlotL dst, rRegL src)
8368 %{
8369 match(Set dst src);
8370
8371 ins_cost(100);
8372 format %{ "movq $dst, $src\t# long stk" %}
8373 ins_encode %{
8374 __ movq($dst$$Address, $src$$Register);
8375 %}
8376 ins_pipe(ialu_mem_reg);
8377 %}
8378
8379 instruct storeSSP(stackSlotP dst, rRegP src)
8380 %{
8381 match(Set dst src);
8382
8383 ins_cost(100);
8384 format %{ "movq $dst, $src\t# ptr stk" %}
8385 ins_encode %{
8386 __ movq($dst$$Address, $src$$Register);
8387 %}
8388 ins_pipe(ialu_mem_reg);
8389 %}
8390
8391 instruct storeSSF(stackSlotF dst, regF src)
8392 %{
8393 match(Set dst src);
8394
8395 ins_cost(95); // XXX
8396 format %{ "movss $dst, $src\t# float stk" %}
8397 ins_encode %{
8398 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8399 %}
8400 ins_pipe(pipe_slow); // XXX
8401 %}
8402
8403 instruct storeSSD(stackSlotD dst, regD src)
8404 %{
8405 match(Set dst src);
8406
8407 ins_cost(95); // XXX
8408 format %{ "movsd $dst, $src\t# double stk" %}
8409 ins_encode %{
8410 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8411 %}
8412 ins_pipe(pipe_slow); // XXX
8413 %}
8414
8415 instruct cacheWB(indirect addr)
8416 %{
8417 predicate(VM_Version::supports_data_cache_line_flush());
8418 match(CacheWB addr);
8419
8420 ins_cost(100);
8421 format %{"cache wb $addr" %}
8422 ins_encode %{
8423 assert($addr->index_position() < 0, "should be");
8424 assert($addr$$disp == 0, "should be");
8425 __ cache_wb(Address($addr$$base$$Register, 0));
8426 %}
8427 ins_pipe(pipe_slow); // XXX
8428 %}
8429
8430 instruct cacheWBPreSync()
8431 %{
8432 predicate(VM_Version::supports_data_cache_line_flush());
8433 match(CacheWBPreSync);
8434
8435 ins_cost(100);
8436 format %{"cache wb presync" %}
8437 ins_encode %{
8438 __ cache_wbsync(true);
8439 %}
8440 ins_pipe(pipe_slow); // XXX
8441 %}
8442
8443 instruct cacheWBPostSync()
8444 %{
8445 predicate(VM_Version::supports_data_cache_line_flush());
8446 match(CacheWBPostSync);
8447
8448 ins_cost(100);
8449 format %{"cache wb postsync" %}
8450 ins_encode %{
8451 __ cache_wbsync(false);
8452 %}
8453 ins_pipe(pipe_slow); // XXX
8454 %}
8455
8456 //----------BSWAP Instructions-------------------------------------------------
8457 instruct bytes_reverse_int(rRegI dst) %{
8458 match(Set dst (ReverseBytesI dst));
8459
8460 format %{ "bswapl $dst" %}
8461 ins_encode %{
8462 __ bswapl($dst$$Register);
8463 %}
8464 ins_pipe( ialu_reg );
8465 %}
8466
8467 instruct bytes_reverse_long(rRegL dst) %{
8468 match(Set dst (ReverseBytesL dst));
8469
8470 format %{ "bswapq $dst" %}
8471 ins_encode %{
8472 __ bswapq($dst$$Register);
8473 %}
8474 ins_pipe( ialu_reg);
8475 %}
8476
8477 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8478 match(Set dst (ReverseBytesUS dst));
8479 effect(KILL cr);
8480
8481 format %{ "bswapl $dst\n\t"
8482 "shrl $dst,16\n\t" %}
8483 ins_encode %{
8484 __ bswapl($dst$$Register);
8485 __ shrl($dst$$Register, 16);
8486 %}
8487 ins_pipe( ialu_reg );
8488 %}
8489
8490 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8491 match(Set dst (ReverseBytesS dst));
8492 effect(KILL cr);
8493
8494 format %{ "bswapl $dst\n\t"
8495 "sar $dst,16\n\t" %}
8496 ins_encode %{
8497 __ bswapl($dst$$Register);
8498 __ sarl($dst$$Register, 16);
8499 %}
8500 ins_pipe( ialu_reg );
8501 %}
8502
8503 //---------- Zeros Count Instructions ------------------------------------------
8504
8505 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8506 predicate(UseCountLeadingZerosInstruction);
8507 match(Set dst (CountLeadingZerosI src));
8508 effect(KILL cr);
8509
8510 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8511 ins_encode %{
8512 __ lzcntl($dst$$Register, $src$$Register);
8513 %}
8514 ins_pipe(ialu_reg);
8515 %}
8516
8517 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8518 predicate(UseCountLeadingZerosInstruction);
8519 match(Set dst (CountLeadingZerosI (LoadI src)));
8520 effect(KILL cr);
8521 ins_cost(175);
8522 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8523 ins_encode %{
8524 __ lzcntl($dst$$Register, $src$$Address);
8525 %}
8526 ins_pipe(ialu_reg_mem);
8527 %}
8528
8529 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8530 predicate(!UseCountLeadingZerosInstruction);
8531 match(Set dst (CountLeadingZerosI src));
8532 effect(KILL cr);
8533
8534 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8535 "jnz skip\n\t"
8536 "movl $dst, -1\n"
8537 "skip:\n\t"
8538 "negl $dst\n\t"
8539 "addl $dst, 31" %}
8540 ins_encode %{
8541 Register Rdst = $dst$$Register;
8542 Register Rsrc = $src$$Register;
8543 Label skip;
8544 __ bsrl(Rdst, Rsrc);
8545 __ jccb(Assembler::notZero, skip);
8546 __ movl(Rdst, -1);
8547 __ bind(skip);
8548 __ negl(Rdst);
8549 __ addl(Rdst, BitsPerInt - 1);
8550 %}
8551 ins_pipe(ialu_reg);
8552 %}
8553
8554 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8555 predicate(UseCountLeadingZerosInstruction);
8556 match(Set dst (CountLeadingZerosL src));
8557 effect(KILL cr);
8558
8559 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8560 ins_encode %{
8561 __ lzcntq($dst$$Register, $src$$Register);
8562 %}
8563 ins_pipe(ialu_reg);
8564 %}
8565
8566 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8567 predicate(UseCountLeadingZerosInstruction);
8568 match(Set dst (CountLeadingZerosL (LoadL src)));
8569 effect(KILL cr);
8570 ins_cost(175);
8571 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8572 ins_encode %{
8573 __ lzcntq($dst$$Register, $src$$Address);
8574 %}
8575 ins_pipe(ialu_reg_mem);
8576 %}
8577
8578 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8579 predicate(!UseCountLeadingZerosInstruction);
8580 match(Set dst (CountLeadingZerosL src));
8581 effect(KILL cr);
8582
8583 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8584 "jnz skip\n\t"
8585 "movl $dst, -1\n"
8586 "skip:\n\t"
8587 "negl $dst\n\t"
8588 "addl $dst, 63" %}
8589 ins_encode %{
8590 Register Rdst = $dst$$Register;
8591 Register Rsrc = $src$$Register;
8592 Label skip;
8593 __ bsrq(Rdst, Rsrc);
8594 __ jccb(Assembler::notZero, skip);
8595 __ movl(Rdst, -1);
8596 __ bind(skip);
8597 __ negl(Rdst);
8598 __ addl(Rdst, BitsPerLong - 1);
8599 %}
8600 ins_pipe(ialu_reg);
8601 %}
8602
8603 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8604 predicate(UseCountTrailingZerosInstruction);
8605 match(Set dst (CountTrailingZerosI src));
8606 effect(KILL cr);
8607
8608 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8609 ins_encode %{
8610 __ tzcntl($dst$$Register, $src$$Register);
8611 %}
8612 ins_pipe(ialu_reg);
8613 %}
8614
8615 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8616 predicate(UseCountTrailingZerosInstruction);
8617 match(Set dst (CountTrailingZerosI (LoadI src)));
8618 effect(KILL cr);
8619 ins_cost(175);
8620 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8621 ins_encode %{
8622 __ tzcntl($dst$$Register, $src$$Address);
8623 %}
8624 ins_pipe(ialu_reg_mem);
8625 %}
8626
8627 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8628 predicate(!UseCountTrailingZerosInstruction);
8629 match(Set dst (CountTrailingZerosI src));
8630 effect(KILL cr);
8631
8632 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8633 "jnz done\n\t"
8634 "movl $dst, 32\n"
8635 "done:" %}
8636 ins_encode %{
8637 Register Rdst = $dst$$Register;
8638 Label done;
8639 __ bsfl(Rdst, $src$$Register);
8640 __ jccb(Assembler::notZero, done);
8641 __ movl(Rdst, BitsPerInt);
8642 __ bind(done);
8643 %}
8644 ins_pipe(ialu_reg);
8645 %}
8646
8647 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8648 predicate(UseCountTrailingZerosInstruction);
8649 match(Set dst (CountTrailingZerosL src));
8650 effect(KILL cr);
8651
8652 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8653 ins_encode %{
8654 __ tzcntq($dst$$Register, $src$$Register);
8655 %}
8656 ins_pipe(ialu_reg);
8657 %}
8658
8659 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8660 predicate(UseCountTrailingZerosInstruction);
8661 match(Set dst (CountTrailingZerosL (LoadL src)));
8662 effect(KILL cr);
8663 ins_cost(175);
8664 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8665 ins_encode %{
8666 __ tzcntq($dst$$Register, $src$$Address);
8667 %}
8668 ins_pipe(ialu_reg_mem);
8669 %}
8670
8671 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8672 predicate(!UseCountTrailingZerosInstruction);
8673 match(Set dst (CountTrailingZerosL src));
8674 effect(KILL cr);
8675
8676 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8677 "jnz done\n\t"
8678 "movl $dst, 64\n"
8679 "done:" %}
8680 ins_encode %{
8681 Register Rdst = $dst$$Register;
8682 Label done;
8683 __ bsfq(Rdst, $src$$Register);
8684 __ jccb(Assembler::notZero, done);
8685 __ movl(Rdst, BitsPerLong);
8686 __ bind(done);
8687 %}
8688 ins_pipe(ialu_reg);
8689 %}
8690
8691 //--------------- Reverse Operation Instructions ----------------
8692 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8693 predicate(!VM_Version::supports_gfni());
8694 match(Set dst (ReverseI src));
8695 effect(TEMP dst, TEMP rtmp, KILL cr);
8696 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8697 ins_encode %{
8698 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8699 %}
8700 ins_pipe( ialu_reg );
8701 %}
8702
8703 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8704 predicate(VM_Version::supports_gfni());
8705 match(Set dst (ReverseI src));
8706 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8707 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8708 ins_encode %{
8709 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8710 %}
8711 ins_pipe( ialu_reg );
8712 %}
8713
8714 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8715 predicate(!VM_Version::supports_gfni());
8716 match(Set dst (ReverseL src));
8717 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8718 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8719 ins_encode %{
8720 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8721 %}
8722 ins_pipe( ialu_reg );
8723 %}
8724
8725 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8726 predicate(VM_Version::supports_gfni());
8727 match(Set dst (ReverseL src));
8728 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8729 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8730 ins_encode %{
8731 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8732 %}
8733 ins_pipe( ialu_reg );
8734 %}
8735
8736 //---------- Population Count Instructions -------------------------------------
8737
8738 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8739 predicate(UsePopCountInstruction);
8740 match(Set dst (PopCountI src));
8741 effect(KILL cr);
8742
8743 format %{ "popcnt $dst, $src" %}
8744 ins_encode %{
8745 __ popcntl($dst$$Register, $src$$Register);
8746 %}
8747 ins_pipe(ialu_reg);
8748 %}
8749
8750 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8751 predicate(UsePopCountInstruction);
8752 match(Set dst (PopCountI (LoadI mem)));
8753 effect(KILL cr);
8754
8755 format %{ "popcnt $dst, $mem" %}
8756 ins_encode %{
8757 __ popcntl($dst$$Register, $mem$$Address);
8758 %}
8759 ins_pipe(ialu_reg);
8760 %}
8761
8762 // Note: Long.bitCount(long) returns an int.
8763 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8764 predicate(UsePopCountInstruction);
8765 match(Set dst (PopCountL src));
8766 effect(KILL cr);
8767
8768 format %{ "popcnt $dst, $src" %}
8769 ins_encode %{
8770 __ popcntq($dst$$Register, $src$$Register);
8771 %}
8772 ins_pipe(ialu_reg);
8773 %}
8774
8775 // Note: Long.bitCount(long) returns an int.
8776 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8777 predicate(UsePopCountInstruction);
8778 match(Set dst (PopCountL (LoadL mem)));
8779 effect(KILL cr);
8780
8781 format %{ "popcnt $dst, $mem" %}
8782 ins_encode %{
8783 __ popcntq($dst$$Register, $mem$$Address);
8784 %}
8785 ins_pipe(ialu_reg);
8786 %}
8787
8788
8789 //----------MemBar Instructions-----------------------------------------------
8790 // Memory barrier flavors
8791
8792 instruct membar_acquire()
8793 %{
8794 match(MemBarAcquire);
8795 match(LoadFence);
8796 ins_cost(0);
8797
8798 size(0);
8799 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8800 ins_encode();
8801 ins_pipe(empty);
8802 %}
8803
8804 instruct membar_acquire_lock()
8805 %{
8806 match(MemBarAcquireLock);
8807 ins_cost(0);
8808
8809 size(0);
8810 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8811 ins_encode();
8812 ins_pipe(empty);
8813 %}
8814
8815 instruct membar_release()
8816 %{
8817 match(MemBarRelease);
8818 match(StoreFence);
8819 ins_cost(0);
8820
8821 size(0);
8822 format %{ "MEMBAR-release ! (empty encoding)" %}
8823 ins_encode();
8824 ins_pipe(empty);
8825 %}
8826
8827 instruct membar_release_lock()
8828 %{
8829 match(MemBarReleaseLock);
8830 ins_cost(0);
8831
8832 size(0);
8833 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8834 ins_encode();
8835 ins_pipe(empty);
8836 %}
8837
8838 instruct membar_storeload(rFlagsReg cr) %{
8839 match(MemBarStoreLoad);
8840 effect(KILL cr);
8841 ins_cost(400);
8842
8843 format %{
8844 $$template
8845 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8846 %}
8847 ins_encode %{
8848 __ membar(Assembler::StoreLoad);
8849 %}
8850 ins_pipe(pipe_slow);
8851 %}
8852
8853 instruct membar_volatile(rFlagsReg cr) %{
8854 match(MemBarVolatile);
8855 effect(KILL cr);
8856 ins_cost(400);
8857
8858 format %{
8859 $$template
8860 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8861 %}
8862 ins_encode %{
8863 __ membar(Assembler::StoreLoad);
8864 %}
8865 ins_pipe(pipe_slow);
8866 %}
8867
8868 instruct unnecessary_membar_volatile()
8869 %{
8870 match(MemBarVolatile);
8871 predicate(Matcher::post_store_load_barrier(n));
8872 ins_cost(0);
8873
8874 size(0);
8875 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8876 ins_encode();
8877 ins_pipe(empty);
8878 %}
8879
8880 instruct membar_full(rFlagsReg cr) %{
8881 match(MemBarFull);
8882 effect(KILL cr);
8883 ins_cost(400);
8884
8885 format %{
8886 $$template
8887 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8888 %}
8889 ins_encode %{
8890 __ membar(Assembler::StoreLoad);
8891 %}
8892 ins_pipe(pipe_slow);
8893 %}
8894
8895 instruct membar_storestore() %{
8896 match(MemBarStoreStore);
8897 match(StoreStoreFence);
8898 ins_cost(0);
8899
8900 size(0);
8901 format %{ "MEMBAR-storestore (empty encoding)" %}
8902 ins_encode( );
8903 ins_pipe(empty);
8904 %}
8905
8906 //----------Move Instructions--------------------------------------------------
8907
8908 instruct castX2P(rRegP dst, rRegL src)
8909 %{
8910 match(Set dst (CastX2P src));
8911
8912 format %{ "movq $dst, $src\t# long->ptr" %}
8913 ins_encode %{
8914 if ($dst$$reg != $src$$reg) {
8915 __ movptr($dst$$Register, $src$$Register);
8916 }
8917 %}
8918 ins_pipe(ialu_reg_reg); // XXX
8919 %}
8920
8921 instruct castP2X(rRegL dst, rRegP src)
8922 %{
8923 match(Set dst (CastP2X src));
8924
8925 format %{ "movq $dst, $src\t# ptr -> long" %}
8926 ins_encode %{
8927 if ($dst$$reg != $src$$reg) {
8928 __ movptr($dst$$Register, $src$$Register);
8929 }
8930 %}
8931 ins_pipe(ialu_reg_reg); // XXX
8932 %}
8933
8934 // Convert oop into int for vectors alignment masking
8935 instruct convP2I(rRegI dst, rRegP src)
8936 %{
8937 match(Set dst (ConvL2I (CastP2X src)));
8938
8939 format %{ "movl $dst, $src\t# ptr -> int" %}
8940 ins_encode %{
8941 __ movl($dst$$Register, $src$$Register);
8942 %}
8943 ins_pipe(ialu_reg_reg); // XXX
8944 %}
8945
8946 // Convert compressed oop into int for vectors alignment masking
8947 // in case of 32bit oops (heap < 4Gb).
8948 instruct convN2I(rRegI dst, rRegN src)
8949 %{
8950 predicate(CompressedOops::shift() == 0);
8951 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8952
8953 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8954 ins_encode %{
8955 __ movl($dst$$Register, $src$$Register);
8956 %}
8957 ins_pipe(ialu_reg_reg); // XXX
8958 %}
8959
8960 // Convert oop pointer into compressed form
8961 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8962 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8963 match(Set dst (EncodeP src));
8964 effect(KILL cr);
8965 format %{ "encode_heap_oop $dst,$src" %}
8966 ins_encode %{
8967 Register s = $src$$Register;
8968 Register d = $dst$$Register;
8969 if (s != d) {
8970 __ movq(d, s);
8971 }
8972 __ encode_heap_oop(d);
8973 %}
8974 ins_pipe(ialu_reg_long);
8975 %}
8976
8977 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8978 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8979 match(Set dst (EncodeP src));
8980 effect(KILL cr);
8981 format %{ "encode_heap_oop_not_null $dst,$src" %}
8982 ins_encode %{
8983 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8984 %}
8985 ins_pipe(ialu_reg_long);
8986 %}
8987
8988 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8989 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8990 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8991 match(Set dst (DecodeN src));
8992 effect(KILL cr);
8993 format %{ "decode_heap_oop $dst,$src" %}
8994 ins_encode %{
8995 Register s = $src$$Register;
8996 Register d = $dst$$Register;
8997 if (s != d) {
8998 __ movq(d, s);
8999 }
9000 __ decode_heap_oop(d);
9001 %}
9002 ins_pipe(ialu_reg_long);
9003 %}
9004
9005 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9006 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9007 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9008 match(Set dst (DecodeN src));
9009 effect(KILL cr);
9010 format %{ "decode_heap_oop_not_null $dst,$src" %}
9011 ins_encode %{
9012 Register s = $src$$Register;
9013 Register d = $dst$$Register;
9014 if (s != d) {
9015 __ decode_heap_oop_not_null(d, s);
9016 } else {
9017 __ decode_heap_oop_not_null(d);
9018 }
9019 %}
9020 ins_pipe(ialu_reg_long);
9021 %}
9022
9023 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9024 match(Set dst (EncodePKlass src));
9025 effect(TEMP dst, KILL cr);
9026 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9027 ins_encode %{
9028 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9029 %}
9030 ins_pipe(ialu_reg_long);
9031 %}
9032
9033 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9034 match(Set dst (DecodeNKlass src));
9035 effect(TEMP dst, KILL cr);
9036 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9037 ins_encode %{
9038 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9039 %}
9040 ins_pipe(ialu_reg_long);
9041 %}
9042
9043 //----------Conditional Move---------------------------------------------------
9044 // Jump
9045 // dummy instruction for generating temp registers
9046 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9047 match(Jump (LShiftL switch_val shift));
9048 ins_cost(350);
9049 predicate(false);
9050 effect(TEMP dest);
9051
9052 format %{ "leaq $dest, [$constantaddress]\n\t"
9053 "jmp [$dest + $switch_val << $shift]\n\t" %}
9054 ins_encode %{
9055 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9056 // to do that and the compiler is using that register as one it can allocate.
9057 // So we build it all by hand.
9058 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9059 // ArrayAddress dispatch(table, index);
9060 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9061 __ lea($dest$$Register, $constantaddress);
9062 __ jmp(dispatch);
9063 %}
9064 ins_pipe(pipe_jmp);
9065 %}
9066
9067 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9068 match(Jump (AddL (LShiftL switch_val shift) offset));
9069 ins_cost(350);
9070 effect(TEMP dest);
9071
9072 format %{ "leaq $dest, [$constantaddress]\n\t"
9073 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9074 ins_encode %{
9075 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9076 // to do that and the compiler is using that register as one it can allocate.
9077 // So we build it all by hand.
9078 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9079 // ArrayAddress dispatch(table, index);
9080 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9081 __ lea($dest$$Register, $constantaddress);
9082 __ jmp(dispatch);
9083 %}
9084 ins_pipe(pipe_jmp);
9085 %}
9086
9087 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9088 match(Jump switch_val);
9089 ins_cost(350);
9090 effect(TEMP dest);
9091
9092 format %{ "leaq $dest, [$constantaddress]\n\t"
9093 "jmp [$dest + $switch_val]\n\t" %}
9094 ins_encode %{
9095 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9096 // to do that and the compiler is using that register as one it can allocate.
9097 // So we build it all by hand.
9098 // Address index(noreg, switch_reg, Address::times_1);
9099 // ArrayAddress dispatch(table, index);
9100 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9101 __ lea($dest$$Register, $constantaddress);
9102 __ jmp(dispatch);
9103 %}
9104 ins_pipe(pipe_jmp);
9105 %}
9106
9107 // Conditional move
9108 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9109 %{
9110 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9111 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9112
9113 ins_cost(100); // XXX
9114 format %{ "setbn$cop $dst\t# signed, int" %}
9115 ins_encode %{
9116 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9117 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9118 %}
9119 ins_pipe(ialu_reg);
9120 %}
9121
9122 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9123 %{
9124 predicate(!UseAPX);
9125 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9126
9127 ins_cost(200); // XXX
9128 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9129 ins_encode %{
9130 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9131 %}
9132 ins_pipe(pipe_cmov_reg);
9133 %}
9134
9135 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9136 %{
9137 predicate(UseAPX);
9138 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9139
9140 ins_cost(200);
9141 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9142 ins_encode %{
9143 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9144 %}
9145 ins_pipe(pipe_cmov_reg);
9146 %}
9147
9148 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9149 %{
9150 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9151 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9152
9153 ins_cost(100); // XXX
9154 format %{ "setbn$cop $dst\t# unsigned, int" %}
9155 ins_encode %{
9156 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9157 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9158 %}
9159 ins_pipe(ialu_reg);
9160 %}
9161
9162 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9163 predicate(!UseAPX);
9164 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9165
9166 ins_cost(200); // XXX
9167 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9168 ins_encode %{
9169 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9170 %}
9171 ins_pipe(pipe_cmov_reg);
9172 %}
9173
9174 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9175 predicate(UseAPX);
9176 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9177
9178 ins_cost(200);
9179 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9180 ins_encode %{
9181 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9182 %}
9183 ins_pipe(pipe_cmov_reg);
9184 %}
9185
9186 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9187 %{
9188 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9189 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9190
9191 ins_cost(100); // XXX
9192 format %{ "setbn$cop $dst\t# unsigned, int" %}
9193 ins_encode %{
9194 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9195 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9196 %}
9197 ins_pipe(ialu_reg);
9198 %}
9199
9200 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9201 %{
9202 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9203 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9204
9205 ins_cost(100); // XXX
9206 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9207 ins_encode %{
9208 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9209 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9210 %}
9211 ins_pipe(ialu_reg);
9212 %}
9213
9214 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9215 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9216
9217 ins_cost(200);
9218 expand %{
9219 cmovI_regU(cop, cr, dst, src);
9220 %}
9221 %}
9222
9223 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9224 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9225
9226 ins_cost(200);
9227 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9228 ins_encode %{
9229 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9230 %}
9231 ins_pipe(pipe_cmov_reg);
9232 %}
9233
9234 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9235 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9236 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9237
9238 ins_cost(200); // XXX
9239 format %{ "cmovpl $dst, $src\n\t"
9240 "cmovnel $dst, $src" %}
9241 ins_encode %{
9242 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9243 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9244 %}
9245 ins_pipe(pipe_cmov_reg);
9246 %}
9247
9248 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9249 // inputs of the CMove
9250 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9251 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9252 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9253 effect(TEMP dst);
9254
9255 ins_cost(200); // XXX
9256 format %{ "cmovpl $dst, $src\n\t"
9257 "cmovnel $dst, $src" %}
9258 ins_encode %{
9259 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9260 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9261 %}
9262 ins_pipe(pipe_cmov_reg);
9263 %}
9264
9265 // Conditional move
9266 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9267 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9268
9269 ins_cost(250); // XXX
9270 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9271 ins_encode %{
9272 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9273 %}
9274 ins_pipe(pipe_cmov_mem);
9275 %}
9276
9277 // Conditional move
9278 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9279 %{
9280 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9281
9282 ins_cost(250); // XXX
9283 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9284 ins_encode %{
9285 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9286 %}
9287 ins_pipe(pipe_cmov_mem);
9288 %}
9289
9290 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9291 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9292
9293 ins_cost(250);
9294 expand %{
9295 cmovI_memU(cop, cr, dst, src);
9296 %}
9297 %}
9298
9299 instruct cmovI_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI dst, memory src) %{
9300 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9301
9302 ins_cost(250); // XXX
9303 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9304 ins_encode %{
9305 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9306 %}
9307 ins_pipe(pipe_cmov_mem);
9308 %}
9309
9310 // Conditional move
9311 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9312 %{
9313 predicate(!UseAPX);
9314 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9315
9316 ins_cost(200); // XXX
9317 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9318 ins_encode %{
9319 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9320 %}
9321 ins_pipe(pipe_cmov_reg);
9322 %}
9323
9324 // Conditional move ndd
9325 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9326 %{
9327 predicate(UseAPX);
9328 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9329
9330 ins_cost(200);
9331 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9332 ins_encode %{
9333 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9334 %}
9335 ins_pipe(pipe_cmov_reg);
9336 %}
9337
9338 // Conditional move
9339 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9340 %{
9341 predicate(!UseAPX);
9342 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9343
9344 ins_cost(200); // XXX
9345 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9346 ins_encode %{
9347 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9348 %}
9349 ins_pipe(pipe_cmov_reg);
9350 %}
9351
9352 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9353 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9354
9355 ins_cost(200);
9356 expand %{
9357 cmovN_regU(cop, cr, dst, src);
9358 %}
9359 %}
9360
9361 // Conditional move ndd
9362 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9363 %{
9364 predicate(UseAPX);
9365 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9366
9367 ins_cost(200);
9368 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9369 ins_encode %{
9370 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9371 %}
9372 ins_pipe(pipe_cmov_reg);
9373 %}
9374
9375 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9376 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9377
9378 ins_cost(200);
9379 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9380 ins_encode %{
9381 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9382 %}
9383 ins_pipe(pipe_cmov_reg);
9384 %}
9385
9386 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9387 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9388 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9389
9390 ins_cost(200); // XXX
9391 format %{ "cmovpl $dst, $src\n\t"
9392 "cmovnel $dst, $src" %}
9393 ins_encode %{
9394 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9395 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9396 %}
9397 ins_pipe(pipe_cmov_reg);
9398 %}
9399
9400 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9401 // inputs of the CMove
9402 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9403 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9404 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9405
9406 ins_cost(200); // XXX
9407 format %{ "cmovpl $dst, $src\n\t"
9408 "cmovnel $dst, $src" %}
9409 ins_encode %{
9410 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9411 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9412 %}
9413 ins_pipe(pipe_cmov_reg);
9414 %}
9415
9416 // Conditional move
9417 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9418 %{
9419 predicate(!UseAPX);
9420 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9421
9422 ins_cost(200); // XXX
9423 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9424 ins_encode %{
9425 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9426 %}
9427 ins_pipe(pipe_cmov_reg); // XXX
9428 %}
9429
9430 // Conditional move ndd
9431 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9432 %{
9433 predicate(UseAPX);
9434 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9435
9436 ins_cost(200);
9437 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9438 ins_encode %{
9439 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9440 %}
9441 ins_pipe(pipe_cmov_reg);
9442 %}
9443
9444 // Conditional move
9445 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9446 %{
9447 predicate(!UseAPX);
9448 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9449
9450 ins_cost(200); // XXX
9451 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9452 ins_encode %{
9453 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9454 %}
9455 ins_pipe(pipe_cmov_reg); // XXX
9456 %}
9457
9458 // Conditional move ndd
9459 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9460 %{
9461 predicate(UseAPX);
9462 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9463
9464 ins_cost(200);
9465 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9466 ins_encode %{
9467 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9468 %}
9469 ins_pipe(pipe_cmov_reg);
9470 %}
9471
9472 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9473 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9474
9475 ins_cost(200);
9476 expand %{
9477 cmovP_regU(cop, cr, dst, src);
9478 %}
9479 %}
9480
9481 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9482 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9483
9484 ins_cost(200);
9485 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9486 ins_encode %{
9487 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9488 %}
9489 ins_pipe(pipe_cmov_reg);
9490 %}
9491
9492 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9493 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9494 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9495
9496 ins_cost(200); // XXX
9497 format %{ "cmovpq $dst, $src\n\t"
9498 "cmovneq $dst, $src" %}
9499 ins_encode %{
9500 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9501 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9502 %}
9503 ins_pipe(pipe_cmov_reg);
9504 %}
9505
9506 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9507 // inputs of the CMove
9508 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9509 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9510 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9511
9512 ins_cost(200); // XXX
9513 format %{ "cmovpq $dst, $src\n\t"
9514 "cmovneq $dst, $src" %}
9515 ins_encode %{
9516 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9517 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9518 %}
9519 ins_pipe(pipe_cmov_reg);
9520 %}
9521
9522 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9523 %{
9524 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9525 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9526
9527 ins_cost(100); // XXX
9528 format %{ "setbn$cop $dst\t# signed, long" %}
9529 ins_encode %{
9530 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9531 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9532 %}
9533 ins_pipe(ialu_reg);
9534 %}
9535
9536 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9537 %{
9538 predicate(!UseAPX);
9539 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9540
9541 ins_cost(200); // XXX
9542 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9543 ins_encode %{
9544 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9545 %}
9546 ins_pipe(pipe_cmov_reg); // XXX
9547 %}
9548
9549 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9550 %{
9551 predicate(UseAPX);
9552 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9553
9554 ins_cost(200);
9555 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9556 ins_encode %{
9557 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9558 %}
9559 ins_pipe(pipe_cmov_reg);
9560 %}
9561
9562 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9563 %{
9564 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9565
9566 ins_cost(200); // XXX
9567 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9568 ins_encode %{
9569 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9570 %}
9571 ins_pipe(pipe_cmov_mem); // XXX
9572 %}
9573
9574 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9575 %{
9576 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9577 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9578
9579 ins_cost(100); // XXX
9580 format %{ "setbn$cop $dst\t# unsigned, long" %}
9581 ins_encode %{
9582 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9583 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9584 %}
9585 ins_pipe(ialu_reg);
9586 %}
9587
9588 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9589 %{
9590 predicate(!UseAPX);
9591 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9592
9593 ins_cost(200); // XXX
9594 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9595 ins_encode %{
9596 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9597 %}
9598 ins_pipe(pipe_cmov_reg); // XXX
9599 %}
9600
9601 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9602 %{
9603 predicate(UseAPX);
9604 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9605
9606 ins_cost(200);
9607 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9608 ins_encode %{
9609 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9610 %}
9611 ins_pipe(pipe_cmov_reg);
9612 %}
9613
9614 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9615 %{
9616 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9617 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9618
9619 ins_cost(100); // XXX
9620 format %{ "setbn$cop $dst\t# unsigned, long" %}
9621 ins_encode %{
9622 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9623 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9624 %}
9625 ins_pipe(ialu_reg);
9626 %}
9627
9628 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9629 %{
9630 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9631 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9632
9633 ins_cost(100); // XXX
9634 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9635 ins_encode %{
9636 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9637 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9638 %}
9639 ins_pipe(ialu_reg);
9640 %}
9641
9642 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9643 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9644
9645 ins_cost(200);
9646 expand %{
9647 cmovL_regU(cop, cr, dst, src);
9648 %}
9649 %}
9650
9651 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9652 %{
9653 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9654
9655 ins_cost(200);
9656 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9657 ins_encode %{
9658 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9659 %}
9660 ins_pipe(pipe_cmov_reg);
9661 %}
9662
9663 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9664 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9665 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9666
9667 ins_cost(200); // XXX
9668 format %{ "cmovpq $dst, $src\n\t"
9669 "cmovneq $dst, $src" %}
9670 ins_encode %{
9671 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9672 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9673 %}
9674 ins_pipe(pipe_cmov_reg);
9675 %}
9676
9677 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9678 // inputs of the CMove
9679 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9680 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9681 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9682
9683 ins_cost(200); // XXX
9684 format %{ "cmovpq $dst, $src\n\t"
9685 "cmovneq $dst, $src" %}
9686 ins_encode %{
9687 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9688 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9689 %}
9690 ins_pipe(pipe_cmov_reg);
9691 %}
9692
9693 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9694 %{
9695 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9696
9697 ins_cost(200); // XXX
9698 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9699 ins_encode %{
9700 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9701 %}
9702 ins_pipe(pipe_cmov_mem); // XXX
9703 %}
9704
9705 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9706 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9707
9708 ins_cost(200);
9709 expand %{
9710 cmovL_memU(cop, cr, dst, src);
9711 %}
9712 %}
9713
9714 instruct cmovL_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL dst, memory src) %{
9715 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9716
9717 ins_cost(200); // XXX
9718 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9719 ins_encode %{
9720 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9721 %}
9722 ins_pipe(pipe_cmov_mem); // XXX
9723 %}
9724
9725 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9726 %{
9727 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9728
9729 ins_cost(200); // XXX
9730 format %{ "jn$cop skip\t# signed cmove float\n\t"
9731 "movss $dst, $src\n"
9732 "skip:" %}
9733 ins_encode %{
9734 Label Lskip;
9735 // Invert sense of branch from sense of CMOV
9736 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9737 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9738 __ bind(Lskip);
9739 %}
9740 ins_pipe(pipe_slow);
9741 %}
9742
9743 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9744 %{
9745 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9746
9747 ins_cost(200); // XXX
9748 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9749 "movss $dst, $src\n"
9750 "skip:" %}
9751 ins_encode %{
9752 Label Lskip;
9753 // Invert sense of branch from sense of CMOV
9754 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9755 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9756 __ bind(Lskip);
9757 %}
9758 ins_pipe(pipe_slow);
9759 %}
9760
9761 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9762 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9763
9764 ins_cost(200);
9765 expand %{
9766 cmovF_regU(cop, cr, dst, src);
9767 %}
9768 %}
9769
9770 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9771 %{
9772 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9773
9774 ins_cost(200); // XXX
9775 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9776 "movss $dst, $src\n"
9777 "skip:" %}
9778 ins_encode %{
9779 Label Lskip;
9780 // Invert sense of branch from sense of CMOV
9781 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9782 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9783 __ bind(Lskip);
9784 %}
9785 ins_pipe(pipe_slow);
9786 %}
9787
9788 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9789 %{
9790 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9791
9792 ins_cost(200); // XXX
9793 format %{ "jn$cop skip\t# signed cmove double\n\t"
9794 "movsd $dst, $src\n"
9795 "skip:" %}
9796 ins_encode %{
9797 Label Lskip;
9798 // Invert sense of branch from sense of CMOV
9799 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9800 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9801 __ bind(Lskip);
9802 %}
9803 ins_pipe(pipe_slow);
9804 %}
9805
9806 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9807 %{
9808 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9809
9810 ins_cost(200); // XXX
9811 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9812 "movsd $dst, $src\n"
9813 "skip:" %}
9814 ins_encode %{
9815 Label Lskip;
9816 // Invert sense of branch from sense of CMOV
9817 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9818 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9819 __ bind(Lskip);
9820 %}
9821 ins_pipe(pipe_slow);
9822 %}
9823
9824 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9825 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9826
9827 ins_cost(200);
9828 expand %{
9829 cmovD_regU(cop, cr, dst, src);
9830 %}
9831 %}
9832
9833 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9834 %{
9835 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9836
9837 ins_cost(200); // XXX
9838 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9839 "movsd $dst, $src\n"
9840 "skip:" %}
9841 ins_encode %{
9842 Label Lskip;
9843 // Invert sense of branch from sense of CMOV
9844 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9845 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9846 __ bind(Lskip);
9847 %}
9848 ins_pipe(pipe_slow);
9849 %}
9850
9851 //----------Arithmetic Instructions--------------------------------------------
9852 //----------Addition Instructions----------------------------------------------
9853
9854 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9855 %{
9856 predicate(!UseAPX);
9857 match(Set dst (AddI dst src));
9858 effect(KILL cr);
9859 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9860 format %{ "addl $dst, $src\t# int" %}
9861 ins_encode %{
9862 __ addl($dst$$Register, $src$$Register);
9863 %}
9864 ins_pipe(ialu_reg_reg);
9865 %}
9866
9867 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9868 %{
9869 predicate(UseAPX);
9870 match(Set dst (AddI src1 src2));
9871 effect(KILL cr);
9872 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9873
9874 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9875 ins_encode %{
9876 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9877 %}
9878 ins_pipe(ialu_reg_reg);
9879 %}
9880
9881 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9882 %{
9883 predicate(!UseAPX);
9884 match(Set dst (AddI dst src));
9885 effect(KILL cr);
9886 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9887
9888 format %{ "addl $dst, $src\t# int" %}
9889 ins_encode %{
9890 __ addl($dst$$Register, $src$$constant);
9891 %}
9892 ins_pipe( ialu_reg );
9893 %}
9894
9895 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9896 %{
9897 predicate(UseAPX);
9898 match(Set dst (AddI src1 src2));
9899 effect(KILL cr);
9900 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9901
9902 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9903 ins_encode %{
9904 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9905 %}
9906 ins_pipe( ialu_reg );
9907 %}
9908
9909 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9910 %{
9911 match(Set dst (AddI dst (LoadI src)));
9912 effect(KILL cr);
9913 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9914
9915 ins_cost(150); // XXX
9916 format %{ "addl $dst, $src\t# int" %}
9917 ins_encode %{
9918 __ addl($dst$$Register, $src$$Address);
9919 %}
9920 ins_pipe(ialu_reg_mem);
9921 %}
9922
9923 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9924 %{
9925 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9926 effect(KILL cr);
9927 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9928
9929 ins_cost(150); // XXX
9930 format %{ "addl $dst, $src\t# int" %}
9931 ins_encode %{
9932 __ addl($dst$$Address, $src$$Register);
9933 %}
9934 ins_pipe(ialu_mem_reg);
9935 %}
9936
9937 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9938 %{
9939 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9940 effect(KILL cr);
9941 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9942
9943
9944 ins_cost(125); // XXX
9945 format %{ "addl $dst, $src\t# int" %}
9946 ins_encode %{
9947 __ addl($dst$$Address, $src$$constant);
9948 %}
9949 ins_pipe(ialu_mem_imm);
9950 %}
9951
9952 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9953 %{
9954 predicate(!UseAPX && UseIncDec);
9955 match(Set dst (AddI dst src));
9956 effect(KILL cr);
9957
9958 format %{ "incl $dst\t# int" %}
9959 ins_encode %{
9960 __ incrementl($dst$$Register);
9961 %}
9962 ins_pipe(ialu_reg);
9963 %}
9964
9965 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9966 %{
9967 predicate(UseAPX && UseIncDec);
9968 match(Set dst (AddI src val));
9969 effect(KILL cr);
9970 flag(PD::Flag_ndd_demotable_opr1);
9971
9972 format %{ "eincl $dst, $src\t# int ndd" %}
9973 ins_encode %{
9974 __ eincl($dst$$Register, $src$$Register, false);
9975 %}
9976 ins_pipe(ialu_reg);
9977 %}
9978
9979 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
9980 %{
9981 predicate(UseIncDec);
9982 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9983 effect(KILL cr);
9984
9985 ins_cost(125); // XXX
9986 format %{ "incl $dst\t# int" %}
9987 ins_encode %{
9988 __ incrementl($dst$$Address);
9989 %}
9990 ins_pipe(ialu_mem_imm);
9991 %}
9992
9993 // XXX why does that use AddI
9994 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
9995 %{
9996 predicate(!UseAPX && UseIncDec);
9997 match(Set dst (AddI dst src));
9998 effect(KILL cr);
9999
10000 format %{ "decl $dst\t# int" %}
10001 ins_encode %{
10002 __ decrementl($dst$$Register);
10003 %}
10004 ins_pipe(ialu_reg);
10005 %}
10006
10007 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10008 %{
10009 predicate(UseAPX && UseIncDec);
10010 match(Set dst (AddI src val));
10011 effect(KILL cr);
10012 flag(PD::Flag_ndd_demotable_opr1);
10013
10014 format %{ "edecl $dst, $src\t# int ndd" %}
10015 ins_encode %{
10016 __ edecl($dst$$Register, $src$$Register, false);
10017 %}
10018 ins_pipe(ialu_reg);
10019 %}
10020
10021 // XXX why does that use AddI
10022 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10023 %{
10024 predicate(UseIncDec);
10025 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10026 effect(KILL cr);
10027
10028 ins_cost(125); // XXX
10029 format %{ "decl $dst\t# int" %}
10030 ins_encode %{
10031 __ decrementl($dst$$Address);
10032 %}
10033 ins_pipe(ialu_mem_imm);
10034 %}
10035
10036 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10037 %{
10038 predicate(VM_Version::supports_fast_2op_lea());
10039 match(Set dst (AddI (LShiftI index scale) disp));
10040
10041 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10042 ins_encode %{
10043 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10044 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10045 %}
10046 ins_pipe(ialu_reg_reg);
10047 %}
10048
10049 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10050 %{
10051 predicate(VM_Version::supports_fast_3op_lea());
10052 match(Set dst (AddI (AddI base index) disp));
10053
10054 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10055 ins_encode %{
10056 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10057 %}
10058 ins_pipe(ialu_reg_reg);
10059 %}
10060
10061 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10062 %{
10063 predicate(VM_Version::supports_fast_2op_lea());
10064 match(Set dst (AddI base (LShiftI index scale)));
10065
10066 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10067 ins_encode %{
10068 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10069 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10070 %}
10071 ins_pipe(ialu_reg_reg);
10072 %}
10073
10074 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10075 %{
10076 predicate(VM_Version::supports_fast_3op_lea());
10077 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10078
10079 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10080 ins_encode %{
10081 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10082 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10083 %}
10084 ins_pipe(ialu_reg_reg);
10085 %}
10086
10087 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10088 %{
10089 predicate(!UseAPX);
10090 match(Set dst (AddL dst src));
10091 effect(KILL cr);
10092 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10093
10094 format %{ "addq $dst, $src\t# long" %}
10095 ins_encode %{
10096 __ addq($dst$$Register, $src$$Register);
10097 %}
10098 ins_pipe(ialu_reg_reg);
10099 %}
10100
10101 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10102 %{
10103 predicate(UseAPX);
10104 match(Set dst (AddL src1 src2));
10105 effect(KILL cr);
10106 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10107
10108 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10109 ins_encode %{
10110 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10111 %}
10112 ins_pipe(ialu_reg_reg);
10113 %}
10114
10115 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10116 %{
10117 predicate(!UseAPX);
10118 match(Set dst (AddL dst src));
10119 effect(KILL cr);
10120 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10121
10122 format %{ "addq $dst, $src\t# long" %}
10123 ins_encode %{
10124 __ addq($dst$$Register, $src$$constant);
10125 %}
10126 ins_pipe( ialu_reg );
10127 %}
10128
10129 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10130 %{
10131 predicate(UseAPX);
10132 match(Set dst (AddL src1 src2));
10133 effect(KILL cr);
10134 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10135
10136 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10137 ins_encode %{
10138 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10139 %}
10140 ins_pipe( ialu_reg );
10141 %}
10142
10143 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10144 %{
10145 match(Set dst (AddL dst (LoadL src)));
10146 effect(KILL cr);
10147 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10148
10149 ins_cost(150); // XXX
10150 format %{ "addq $dst, $src\t# long" %}
10151 ins_encode %{
10152 __ addq($dst$$Register, $src$$Address);
10153 %}
10154 ins_pipe(ialu_reg_mem);
10155 %}
10156
10157 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10158 %{
10159 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10160 effect(KILL cr);
10161 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10162
10163 ins_cost(150); // XXX
10164 format %{ "addq $dst, $src\t# long" %}
10165 ins_encode %{
10166 __ addq($dst$$Address, $src$$Register);
10167 %}
10168 ins_pipe(ialu_mem_reg);
10169 %}
10170
10171 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10172 %{
10173 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10174 effect(KILL cr);
10175 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10176
10177 ins_cost(125); // XXX
10178 format %{ "addq $dst, $src\t# long" %}
10179 ins_encode %{
10180 __ addq($dst$$Address, $src$$constant);
10181 %}
10182 ins_pipe(ialu_mem_imm);
10183 %}
10184
10185 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10186 %{
10187 predicate(!UseAPX && UseIncDec);
10188 match(Set dst (AddL dst src));
10189 effect(KILL cr);
10190
10191 format %{ "incq $dst\t# long" %}
10192 ins_encode %{
10193 __ incrementq($dst$$Register);
10194 %}
10195 ins_pipe(ialu_reg);
10196 %}
10197
10198 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10199 %{
10200 predicate(UseAPX && UseIncDec);
10201 match(Set dst (AddL src val));
10202 effect(KILL cr);
10203 flag(PD::Flag_ndd_demotable_opr1);
10204
10205 format %{ "eincq $dst, $src\t# long ndd" %}
10206 ins_encode %{
10207 __ eincq($dst$$Register, $src$$Register, false);
10208 %}
10209 ins_pipe(ialu_reg);
10210 %}
10211
10212 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10213 %{
10214 predicate(UseIncDec);
10215 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10216 effect(KILL cr);
10217
10218 ins_cost(125); // XXX
10219 format %{ "incq $dst\t# long" %}
10220 ins_encode %{
10221 __ incrementq($dst$$Address);
10222 %}
10223 ins_pipe(ialu_mem_imm);
10224 %}
10225
10226 // XXX why does that use AddL
10227 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10228 %{
10229 predicate(!UseAPX && UseIncDec);
10230 match(Set dst (AddL dst src));
10231 effect(KILL cr);
10232
10233 format %{ "decq $dst\t# long" %}
10234 ins_encode %{
10235 __ decrementq($dst$$Register);
10236 %}
10237 ins_pipe(ialu_reg);
10238 %}
10239
10240 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10241 %{
10242 predicate(UseAPX && UseIncDec);
10243 match(Set dst (AddL src val));
10244 effect(KILL cr);
10245 flag(PD::Flag_ndd_demotable_opr1);
10246
10247 format %{ "edecq $dst, $src\t# long ndd" %}
10248 ins_encode %{
10249 __ edecq($dst$$Register, $src$$Register, false);
10250 %}
10251 ins_pipe(ialu_reg);
10252 %}
10253
10254 // XXX why does that use AddL
10255 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10256 %{
10257 predicate(UseIncDec);
10258 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10259 effect(KILL cr);
10260
10261 ins_cost(125); // XXX
10262 format %{ "decq $dst\t# long" %}
10263 ins_encode %{
10264 __ decrementq($dst$$Address);
10265 %}
10266 ins_pipe(ialu_mem_imm);
10267 %}
10268
10269 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10270 %{
10271 predicate(VM_Version::supports_fast_2op_lea());
10272 match(Set dst (AddL (LShiftL index scale) disp));
10273
10274 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10275 ins_encode %{
10276 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10277 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10278 %}
10279 ins_pipe(ialu_reg_reg);
10280 %}
10281
10282 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10283 %{
10284 predicate(VM_Version::supports_fast_3op_lea());
10285 match(Set dst (AddL (AddL base index) disp));
10286
10287 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10288 ins_encode %{
10289 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10290 %}
10291 ins_pipe(ialu_reg_reg);
10292 %}
10293
10294 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10295 %{
10296 predicate(VM_Version::supports_fast_2op_lea());
10297 match(Set dst (AddL base (LShiftL index scale)));
10298
10299 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10300 ins_encode %{
10301 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10302 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10303 %}
10304 ins_pipe(ialu_reg_reg);
10305 %}
10306
10307 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10308 %{
10309 predicate(VM_Version::supports_fast_3op_lea());
10310 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10311
10312 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10313 ins_encode %{
10314 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10315 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10316 %}
10317 ins_pipe(ialu_reg_reg);
10318 %}
10319
10320 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10321 %{
10322 match(Set dst (AddP dst src));
10323 effect(KILL cr);
10324 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10325
10326 format %{ "addq $dst, $src\t# ptr" %}
10327 ins_encode %{
10328 __ addq($dst$$Register, $src$$Register);
10329 %}
10330 ins_pipe(ialu_reg_reg);
10331 %}
10332
10333 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10334 %{
10335 match(Set dst (AddP dst src));
10336 effect(KILL cr);
10337 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338
10339 format %{ "addq $dst, $src\t# ptr" %}
10340 ins_encode %{
10341 __ addq($dst$$Register, $src$$constant);
10342 %}
10343 ins_pipe( ialu_reg );
10344 %}
10345
10346 // XXX addP mem ops ????
10347
10348 instruct checkCastPP(rRegP dst)
10349 %{
10350 match(Set dst (CheckCastPP dst));
10351
10352 size(0);
10353 format %{ "# checkcastPP of $dst" %}
10354 ins_encode(/* empty encoding */);
10355 ins_pipe(empty);
10356 %}
10357
10358 instruct castPP(rRegP dst)
10359 %{
10360 match(Set dst (CastPP dst));
10361
10362 size(0);
10363 format %{ "# castPP of $dst" %}
10364 ins_encode(/* empty encoding */);
10365 ins_pipe(empty);
10366 %}
10367
10368 instruct castII(rRegI dst)
10369 %{
10370 predicate(VerifyConstraintCasts == 0);
10371 match(Set dst (CastII dst));
10372
10373 size(0);
10374 format %{ "# castII of $dst" %}
10375 ins_encode(/* empty encoding */);
10376 ins_cost(0);
10377 ins_pipe(empty);
10378 %}
10379
10380 instruct castII_checked(rRegI dst, rFlagsReg cr)
10381 %{
10382 predicate(VerifyConstraintCasts > 0);
10383 match(Set dst (CastII dst));
10384
10385 effect(KILL cr);
10386 format %{ "# cast_checked_II $dst" %}
10387 ins_encode %{
10388 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10389 %}
10390 ins_pipe(pipe_slow);
10391 %}
10392
10393 instruct castLL(rRegL dst)
10394 %{
10395 predicate(VerifyConstraintCasts == 0);
10396 match(Set dst (CastLL dst));
10397
10398 size(0);
10399 format %{ "# castLL of $dst" %}
10400 ins_encode(/* empty encoding */);
10401 ins_cost(0);
10402 ins_pipe(empty);
10403 %}
10404
10405 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10406 %{
10407 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10408 match(Set dst (CastLL dst));
10409
10410 effect(KILL cr);
10411 format %{ "# cast_checked_LL $dst" %}
10412 ins_encode %{
10413 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10414 %}
10415 ins_pipe(pipe_slow);
10416 %}
10417
10418 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10419 %{
10420 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10421 match(Set dst (CastLL dst));
10422
10423 effect(KILL cr, TEMP tmp);
10424 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10425 ins_encode %{
10426 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10427 %}
10428 ins_pipe(pipe_slow);
10429 %}
10430
10431 instruct castFF(regF dst)
10432 %{
10433 match(Set dst (CastFF dst));
10434
10435 size(0);
10436 format %{ "# castFF of $dst" %}
10437 ins_encode(/* empty encoding */);
10438 ins_cost(0);
10439 ins_pipe(empty);
10440 %}
10441
10442 instruct castHH(regF dst)
10443 %{
10444 match(Set dst (CastHH dst));
10445
10446 size(0);
10447 format %{ "# castHH of $dst" %}
10448 ins_encode(/* empty encoding */);
10449 ins_cost(0);
10450 ins_pipe(empty);
10451 %}
10452
10453 instruct castDD(regD dst)
10454 %{
10455 match(Set dst (CastDD dst));
10456
10457 size(0);
10458 format %{ "# castDD of $dst" %}
10459 ins_encode(/* empty encoding */);
10460 ins_cost(0);
10461 ins_pipe(empty);
10462 %}
10463
10464 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10465 instruct compareAndSwapP(rRegI res,
10466 memory mem_ptr,
10467 rax_RegP oldval, rRegP newval,
10468 rFlagsReg cr)
10469 %{
10470 predicate(n->as_LoadStore()->barrier_data() == 0);
10471 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10472 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10473 effect(KILL cr, KILL oldval);
10474
10475 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10476 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10477 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10478 ins_encode %{
10479 __ lock();
10480 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10481 __ setcc(Assembler::equal, $res$$Register);
10482 %}
10483 ins_pipe( pipe_cmpxchg );
10484 %}
10485
10486 instruct compareAndSwapL(rRegI res,
10487 memory mem_ptr,
10488 rax_RegL oldval, rRegL newval,
10489 rFlagsReg cr)
10490 %{
10491 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10492 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10493 effect(KILL cr, KILL oldval);
10494
10495 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10496 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10497 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10498 ins_encode %{
10499 __ lock();
10500 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10501 __ setcc(Assembler::equal, $res$$Register);
10502 %}
10503 ins_pipe( pipe_cmpxchg );
10504 %}
10505
10506 instruct compareAndSwapI(rRegI res,
10507 memory mem_ptr,
10508 rax_RegI oldval, rRegI newval,
10509 rFlagsReg cr)
10510 %{
10511 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10512 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10513 effect(KILL cr, KILL oldval);
10514
10515 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10516 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10517 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10518 ins_encode %{
10519 __ lock();
10520 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10521 __ setcc(Assembler::equal, $res$$Register);
10522 %}
10523 ins_pipe( pipe_cmpxchg );
10524 %}
10525
10526 instruct compareAndSwapB(rRegI res,
10527 memory mem_ptr,
10528 rax_RegI oldval, rRegI newval,
10529 rFlagsReg cr)
10530 %{
10531 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10532 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10533 effect(KILL cr, KILL oldval);
10534
10535 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10536 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10537 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10538 ins_encode %{
10539 __ lock();
10540 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10541 __ setcc(Assembler::equal, $res$$Register);
10542 %}
10543 ins_pipe( pipe_cmpxchg );
10544 %}
10545
10546 instruct compareAndSwapS(rRegI res,
10547 memory mem_ptr,
10548 rax_RegI oldval, rRegI newval,
10549 rFlagsReg cr)
10550 %{
10551 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10552 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10553 effect(KILL cr, KILL oldval);
10554
10555 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10556 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10557 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10558 ins_encode %{
10559 __ lock();
10560 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10561 __ setcc(Assembler::equal, $res$$Register);
10562 %}
10563 ins_pipe( pipe_cmpxchg );
10564 %}
10565
10566 instruct compareAndSwapN(rRegI res,
10567 memory mem_ptr,
10568 rax_RegN oldval, rRegN newval,
10569 rFlagsReg cr) %{
10570 predicate(n->as_LoadStore()->barrier_data() == 0);
10571 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10572 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10573 effect(KILL cr, KILL oldval);
10574
10575 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10576 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10577 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10578 ins_encode %{
10579 __ lock();
10580 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10581 __ setcc(Assembler::equal, $res$$Register);
10582 %}
10583 ins_pipe( pipe_cmpxchg );
10584 %}
10585
10586 instruct compareAndExchangeB(
10587 memory mem_ptr,
10588 rax_RegI oldval, rRegI newval,
10589 rFlagsReg cr)
10590 %{
10591 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10592 effect(KILL cr);
10593
10594 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10595 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10596 ins_encode %{
10597 __ lock();
10598 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10599 %}
10600 ins_pipe( pipe_cmpxchg );
10601 %}
10602
10603 instruct compareAndExchangeS(
10604 memory mem_ptr,
10605 rax_RegI oldval, rRegI newval,
10606 rFlagsReg cr)
10607 %{
10608 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10609 effect(KILL cr);
10610
10611 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10612 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10613 ins_encode %{
10614 __ lock();
10615 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10616 %}
10617 ins_pipe( pipe_cmpxchg );
10618 %}
10619
10620 instruct compareAndExchangeI(
10621 memory mem_ptr,
10622 rax_RegI oldval, rRegI newval,
10623 rFlagsReg cr)
10624 %{
10625 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10626 effect(KILL cr);
10627
10628 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10629 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10630 ins_encode %{
10631 __ lock();
10632 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10633 %}
10634 ins_pipe( pipe_cmpxchg );
10635 %}
10636
10637 instruct compareAndExchangeL(
10638 memory mem_ptr,
10639 rax_RegL oldval, rRegL newval,
10640 rFlagsReg cr)
10641 %{
10642 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10643 effect(KILL cr);
10644
10645 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10646 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10647 ins_encode %{
10648 __ lock();
10649 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10650 %}
10651 ins_pipe( pipe_cmpxchg );
10652 %}
10653
10654 instruct compareAndExchangeN(
10655 memory mem_ptr,
10656 rax_RegN oldval, rRegN newval,
10657 rFlagsReg cr) %{
10658 predicate(n->as_LoadStore()->barrier_data() == 0);
10659 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10660 effect(KILL cr);
10661
10662 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10663 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10664 ins_encode %{
10665 __ lock();
10666 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10667 %}
10668 ins_pipe( pipe_cmpxchg );
10669 %}
10670
10671 instruct compareAndExchangeP(
10672 memory mem_ptr,
10673 rax_RegP oldval, rRegP newval,
10674 rFlagsReg cr)
10675 %{
10676 predicate(n->as_LoadStore()->barrier_data() == 0);
10677 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10678 effect(KILL cr);
10679
10680 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10681 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10682 ins_encode %{
10683 __ lock();
10684 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10685 %}
10686 ins_pipe( pipe_cmpxchg );
10687 %}
10688
10689 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10690 predicate(n->as_LoadStore()->result_not_used());
10691 match(Set dummy (GetAndAddB mem add));
10692 effect(KILL cr);
10693 format %{ "addb_lock $mem, $add" %}
10694 ins_encode %{
10695 __ lock();
10696 __ addb($mem$$Address, $add$$Register);
10697 %}
10698 ins_pipe(pipe_cmpxchg);
10699 %}
10700
10701 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10702 predicate(n->as_LoadStore()->result_not_used());
10703 match(Set dummy (GetAndAddB mem add));
10704 effect(KILL cr);
10705 format %{ "addb_lock $mem, $add" %}
10706 ins_encode %{
10707 __ lock();
10708 __ addb($mem$$Address, $add$$constant);
10709 %}
10710 ins_pipe(pipe_cmpxchg);
10711 %}
10712
10713 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10714 predicate(!n->as_LoadStore()->result_not_used());
10715 match(Set newval (GetAndAddB mem newval));
10716 effect(KILL cr);
10717 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10718 ins_encode %{
10719 __ lock();
10720 __ xaddb($mem$$Address, $newval$$Register);
10721 __ narrow_subword_type($newval$$Register, T_BYTE);
10722 %}
10723 ins_pipe(pipe_cmpxchg);
10724 %}
10725
10726 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10727 predicate(n->as_LoadStore()->result_not_used());
10728 match(Set dummy (GetAndAddS mem add));
10729 effect(KILL cr);
10730 format %{ "addw_lock $mem, $add" %}
10731 ins_encode %{
10732 __ lock();
10733 __ addw($mem$$Address, $add$$Register);
10734 %}
10735 ins_pipe(pipe_cmpxchg);
10736 %}
10737
10738 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10739 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10740 match(Set dummy (GetAndAddS mem add));
10741 effect(KILL cr);
10742 format %{ "addw_lock $mem, $add" %}
10743 ins_encode %{
10744 __ lock();
10745 __ addw($mem$$Address, $add$$constant);
10746 %}
10747 ins_pipe(pipe_cmpxchg);
10748 %}
10749
10750 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10751 predicate(!n->as_LoadStore()->result_not_used());
10752 match(Set newval (GetAndAddS mem newval));
10753 effect(KILL cr);
10754 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
10755 ins_encode %{
10756 __ lock();
10757 __ xaddw($mem$$Address, $newval$$Register);
10758 __ narrow_subword_type($newval$$Register, T_SHORT);
10759 %}
10760 ins_pipe(pipe_cmpxchg);
10761 %}
10762
10763 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10764 predicate(n->as_LoadStore()->result_not_used());
10765 match(Set dummy (GetAndAddI mem add));
10766 effect(KILL cr);
10767 format %{ "addl_lock $mem, $add" %}
10768 ins_encode %{
10769 __ lock();
10770 __ addl($mem$$Address, $add$$Register);
10771 %}
10772 ins_pipe(pipe_cmpxchg);
10773 %}
10774
10775 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10776 predicate(n->as_LoadStore()->result_not_used());
10777 match(Set dummy (GetAndAddI mem add));
10778 effect(KILL cr);
10779 format %{ "addl_lock $mem, $add" %}
10780 ins_encode %{
10781 __ lock();
10782 __ addl($mem$$Address, $add$$constant);
10783 %}
10784 ins_pipe(pipe_cmpxchg);
10785 %}
10786
10787 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10788 predicate(!n->as_LoadStore()->result_not_used());
10789 match(Set newval (GetAndAddI mem newval));
10790 effect(KILL cr);
10791 format %{ "xaddl_lock $mem, $newval" %}
10792 ins_encode %{
10793 __ lock();
10794 __ xaddl($mem$$Address, $newval$$Register);
10795 %}
10796 ins_pipe(pipe_cmpxchg);
10797 %}
10798
10799 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10800 predicate(n->as_LoadStore()->result_not_used());
10801 match(Set dummy (GetAndAddL mem add));
10802 effect(KILL cr);
10803 format %{ "addq_lock $mem, $add" %}
10804 ins_encode %{
10805 __ lock();
10806 __ addq($mem$$Address, $add$$Register);
10807 %}
10808 ins_pipe(pipe_cmpxchg);
10809 %}
10810
10811 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10812 predicate(n->as_LoadStore()->result_not_used());
10813 match(Set dummy (GetAndAddL mem add));
10814 effect(KILL cr);
10815 format %{ "addq_lock $mem, $add" %}
10816 ins_encode %{
10817 __ lock();
10818 __ addq($mem$$Address, $add$$constant);
10819 %}
10820 ins_pipe(pipe_cmpxchg);
10821 %}
10822
10823 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10824 predicate(!n->as_LoadStore()->result_not_used());
10825 match(Set newval (GetAndAddL mem newval));
10826 effect(KILL cr);
10827 format %{ "xaddq_lock $mem, $newval" %}
10828 ins_encode %{
10829 __ lock();
10830 __ xaddq($mem$$Address, $newval$$Register);
10831 %}
10832 ins_pipe(pipe_cmpxchg);
10833 %}
10834
10835 instruct xchgB( memory mem, rRegI newval) %{
10836 match(Set newval (GetAndSetB mem newval));
10837 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
10838 ins_encode %{
10839 __ xchgb($newval$$Register, $mem$$Address);
10840 __ narrow_subword_type($newval$$Register, T_BYTE);
10841 %}
10842 ins_pipe( pipe_cmpxchg );
10843 %}
10844
10845 instruct xchgS( memory mem, rRegI newval) %{
10846 match(Set newval (GetAndSetS mem newval));
10847 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
10848 ins_encode %{
10849 __ xchgw($newval$$Register, $mem$$Address);
10850 __ narrow_subword_type($newval$$Register, T_SHORT);
10851 %}
10852 ins_pipe( pipe_cmpxchg );
10853 %}
10854
10855 instruct xchgI( memory mem, rRegI newval) %{
10856 match(Set newval (GetAndSetI mem newval));
10857 format %{ "XCHGL $newval,[$mem]" %}
10858 ins_encode %{
10859 __ xchgl($newval$$Register, $mem$$Address);
10860 %}
10861 ins_pipe( pipe_cmpxchg );
10862 %}
10863
10864 instruct xchgL( memory mem, rRegL newval) %{
10865 match(Set newval (GetAndSetL mem newval));
10866 format %{ "XCHGL $newval,[$mem]" %}
10867 ins_encode %{
10868 __ xchgq($newval$$Register, $mem$$Address);
10869 %}
10870 ins_pipe( pipe_cmpxchg );
10871 %}
10872
10873 instruct xchgP( memory mem, rRegP newval) %{
10874 match(Set newval (GetAndSetP mem newval));
10875 predicate(n->as_LoadStore()->barrier_data() == 0);
10876 format %{ "XCHGQ $newval,[$mem]" %}
10877 ins_encode %{
10878 __ xchgq($newval$$Register, $mem$$Address);
10879 %}
10880 ins_pipe( pipe_cmpxchg );
10881 %}
10882
10883 instruct xchgN( memory mem, rRegN newval) %{
10884 predicate(n->as_LoadStore()->barrier_data() == 0);
10885 match(Set newval (GetAndSetN mem newval));
10886 format %{ "XCHGL $newval,$mem]" %}
10887 ins_encode %{
10888 __ xchgl($newval$$Register, $mem$$Address);
10889 %}
10890 ins_pipe( pipe_cmpxchg );
10891 %}
10892
10893 //----------Abs Instructions-------------------------------------------
10894
10895 // Integer Absolute Instructions
10896 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10897 %{
10898 match(Set dst (AbsI src));
10899 effect(TEMP dst, KILL cr);
10900 format %{ "xorl $dst, $dst\t# abs int\n\t"
10901 "subl $dst, $src\n\t"
10902 "cmovll $dst, $src" %}
10903 ins_encode %{
10904 __ xorl($dst$$Register, $dst$$Register);
10905 __ subl($dst$$Register, $src$$Register);
10906 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10907 %}
10908
10909 ins_pipe(ialu_reg_reg);
10910 %}
10911
10912 // Long Absolute Instructions
10913 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10914 %{
10915 match(Set dst (AbsL src));
10916 effect(TEMP dst, KILL cr);
10917 format %{ "xorl $dst, $dst\t# abs long\n\t"
10918 "subq $dst, $src\n\t"
10919 "cmovlq $dst, $src" %}
10920 ins_encode %{
10921 __ xorl($dst$$Register, $dst$$Register);
10922 __ subq($dst$$Register, $src$$Register);
10923 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10924 %}
10925
10926 ins_pipe(ialu_reg_reg);
10927 %}
10928
10929 //----------Subtraction Instructions-------------------------------------------
10930
10931 // Integer Subtraction Instructions
10932 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10933 %{
10934 predicate(!UseAPX);
10935 match(Set dst (SubI dst src));
10936 effect(KILL cr);
10937 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10938
10939 format %{ "subl $dst, $src\t# int" %}
10940 ins_encode %{
10941 __ subl($dst$$Register, $src$$Register);
10942 %}
10943 ins_pipe(ialu_reg_reg);
10944 %}
10945
10946 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10947 %{
10948 predicate(UseAPX);
10949 match(Set dst (SubI src1 src2));
10950 effect(KILL cr);
10951 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10952
10953 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10954 ins_encode %{
10955 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10956 %}
10957 ins_pipe(ialu_reg_reg);
10958 %}
10959
10960 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10961 %{
10962 predicate(UseAPX);
10963 match(Set dst (SubI src1 src2));
10964 effect(KILL cr);
10965 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10966
10967 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10968 ins_encode %{
10969 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
10970 %}
10971 ins_pipe(ialu_reg_reg);
10972 %}
10973
10974 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10975 %{
10976 match(Set dst (SubI dst (LoadI src)));
10977 effect(KILL cr);
10978 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10979
10980 ins_cost(150);
10981 format %{ "subl $dst, $src\t# int" %}
10982 ins_encode %{
10983 __ subl($dst$$Register, $src$$Address);
10984 %}
10985 ins_pipe(ialu_reg_mem);
10986 %}
10987
10988 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10989 %{
10990 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
10991 effect(KILL cr);
10992 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10993
10994 ins_cost(150);
10995 format %{ "subl $dst, $src\t# int" %}
10996 ins_encode %{
10997 __ subl($dst$$Address, $src$$Register);
10998 %}
10999 ins_pipe(ialu_mem_reg);
11000 %}
11001
11002 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11003 %{
11004 predicate(!UseAPX);
11005 match(Set dst (SubL dst src));
11006 effect(KILL cr);
11007 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11008
11009 format %{ "subq $dst, $src\t# long" %}
11010 ins_encode %{
11011 __ subq($dst$$Register, $src$$Register);
11012 %}
11013 ins_pipe(ialu_reg_reg);
11014 %}
11015
11016 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11017 %{
11018 predicate(UseAPX);
11019 match(Set dst (SubL src1 src2));
11020 effect(KILL cr);
11021 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11022
11023 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11024 ins_encode %{
11025 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11026 %}
11027 ins_pipe(ialu_reg_reg);
11028 %}
11029
11030 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11031 %{
11032 predicate(UseAPX);
11033 match(Set dst (SubL src1 src2));
11034 effect(KILL cr);
11035 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11036
11037 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11038 ins_encode %{
11039 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11040 %}
11041 ins_pipe(ialu_reg_reg);
11042 %}
11043
11044 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11045 %{
11046 match(Set dst (SubL dst (LoadL src)));
11047 effect(KILL cr);
11048 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11049
11050 ins_cost(150);
11051 format %{ "subq $dst, $src\t# long" %}
11052 ins_encode %{
11053 __ subq($dst$$Register, $src$$Address);
11054 %}
11055 ins_pipe(ialu_reg_mem);
11056 %}
11057
11058 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11059 %{
11060 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11061 effect(KILL cr);
11062 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11063
11064 ins_cost(150);
11065 format %{ "subq $dst, $src\t# long" %}
11066 ins_encode %{
11067 __ subq($dst$$Address, $src$$Register);
11068 %}
11069 ins_pipe(ialu_mem_reg);
11070 %}
11071
11072 // Subtract from a pointer
11073 // XXX hmpf???
11074 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11075 %{
11076 match(Set dst (AddP dst (SubI zero src)));
11077 effect(KILL cr);
11078
11079 format %{ "subq $dst, $src\t# ptr - int" %}
11080 ins_encode %{
11081 __ subq($dst$$Register, $src$$Register);
11082 %}
11083 ins_pipe(ialu_reg_reg);
11084 %}
11085
11086 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11087 %{
11088 predicate(!UseAPX);
11089 match(Set dst (SubI zero dst));
11090 effect(KILL cr);
11091 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11092
11093 format %{ "negl $dst\t# int" %}
11094 ins_encode %{
11095 __ negl($dst$$Register);
11096 %}
11097 ins_pipe(ialu_reg);
11098 %}
11099
11100 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11101 %{
11102 predicate(UseAPX);
11103 match(Set dst (SubI zero src));
11104 effect(KILL cr);
11105 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11106
11107 format %{ "enegl $dst, $src\t# int ndd" %}
11108 ins_encode %{
11109 __ enegl($dst$$Register, $src$$Register, false);
11110 %}
11111 ins_pipe(ialu_reg);
11112 %}
11113
11114 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11115 %{
11116 predicate(!UseAPX);
11117 match(Set dst (NegI dst));
11118 effect(KILL cr);
11119 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11120
11121 format %{ "negl $dst\t# int" %}
11122 ins_encode %{
11123 __ negl($dst$$Register);
11124 %}
11125 ins_pipe(ialu_reg);
11126 %}
11127
11128 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11129 %{
11130 predicate(UseAPX);
11131 match(Set dst (NegI src));
11132 effect(KILL cr);
11133 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11134
11135 format %{ "enegl $dst, $src\t# int ndd" %}
11136 ins_encode %{
11137 __ enegl($dst$$Register, $src$$Register, false);
11138 %}
11139 ins_pipe(ialu_reg);
11140 %}
11141
11142 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11143 %{
11144 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11145 effect(KILL cr);
11146 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11147
11148 format %{ "negl $dst\t# int" %}
11149 ins_encode %{
11150 __ negl($dst$$Address);
11151 %}
11152 ins_pipe(ialu_reg);
11153 %}
11154
11155 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11156 %{
11157 predicate(!UseAPX);
11158 match(Set dst (SubL zero dst));
11159 effect(KILL cr);
11160 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11161
11162 format %{ "negq $dst\t# long" %}
11163 ins_encode %{
11164 __ negq($dst$$Register);
11165 %}
11166 ins_pipe(ialu_reg);
11167 %}
11168
11169 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11170 %{
11171 predicate(UseAPX);
11172 match(Set dst (SubL zero src));
11173 effect(KILL cr);
11174 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11175
11176 format %{ "enegq $dst, $src\t# long ndd" %}
11177 ins_encode %{
11178 __ enegq($dst$$Register, $src$$Register, false);
11179 %}
11180 ins_pipe(ialu_reg);
11181 %}
11182
11183 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11184 %{
11185 predicate(!UseAPX);
11186 match(Set dst (NegL dst));
11187 effect(KILL cr);
11188 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11189
11190 format %{ "negq $dst\t# int" %}
11191 ins_encode %{
11192 __ negq($dst$$Register);
11193 %}
11194 ins_pipe(ialu_reg);
11195 %}
11196
11197 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11198 %{
11199 predicate(UseAPX);
11200 match(Set dst (NegL src));
11201 effect(KILL cr);
11202 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11203
11204 format %{ "enegq $dst, $src\t# long ndd" %}
11205 ins_encode %{
11206 __ enegq($dst$$Register, $src$$Register, false);
11207 %}
11208 ins_pipe(ialu_reg);
11209 %}
11210
11211 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11212 %{
11213 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11214 effect(KILL cr);
11215 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11216
11217 format %{ "negq $dst\t# long" %}
11218 ins_encode %{
11219 __ negq($dst$$Address);
11220 %}
11221 ins_pipe(ialu_reg);
11222 %}
11223
11224 //----------Multiplication/Division Instructions-------------------------------
11225 // Integer Multiplication Instructions
11226 // Multiply Register
11227
11228 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11229 %{
11230 predicate(!UseAPX);
11231 match(Set dst (MulI dst src));
11232 effect(KILL cr);
11233
11234 ins_cost(300);
11235 format %{ "imull $dst, $src\t# int" %}
11236 ins_encode %{
11237 __ imull($dst$$Register, $src$$Register);
11238 %}
11239 ins_pipe(ialu_reg_reg_alu0);
11240 %}
11241
11242 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11243 %{
11244 predicate(UseAPX);
11245 match(Set dst (MulI src1 src2));
11246 effect(KILL cr);
11247 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11248
11249 ins_cost(300);
11250 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11251 ins_encode %{
11252 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11253 %}
11254 ins_pipe(ialu_reg_reg_alu0);
11255 %}
11256
11257 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11258 %{
11259 match(Set dst (MulI src imm));
11260 effect(KILL cr);
11261
11262 ins_cost(300);
11263 format %{ "imull $dst, $src, $imm\t# int" %}
11264 ins_encode %{
11265 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11266 %}
11267 ins_pipe(ialu_reg_reg_alu0);
11268 %}
11269
11270 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11271 %{
11272 match(Set dst (MulI dst (LoadI src)));
11273 effect(KILL cr);
11274
11275 ins_cost(350);
11276 format %{ "imull $dst, $src\t# int" %}
11277 ins_encode %{
11278 __ imull($dst$$Register, $src$$Address);
11279 %}
11280 ins_pipe(ialu_reg_mem_alu0);
11281 %}
11282
11283 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11284 %{
11285 match(Set dst (MulI (LoadI src) imm));
11286 effect(KILL cr);
11287
11288 ins_cost(300);
11289 format %{ "imull $dst, $src, $imm\t# int" %}
11290 ins_encode %{
11291 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11292 %}
11293 ins_pipe(ialu_reg_mem_alu0);
11294 %}
11295
11296 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11297 %{
11298 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11299 effect(KILL cr, KILL src2);
11300
11301 expand %{ mulI_rReg(dst, src1, cr);
11302 mulI_rReg(src2, src3, cr);
11303 addI_rReg(dst, src2, cr); %}
11304 %}
11305
11306 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11307 %{
11308 predicate(!UseAPX);
11309 match(Set dst (MulL dst src));
11310 effect(KILL cr);
11311
11312 ins_cost(300);
11313 format %{ "imulq $dst, $src\t# long" %}
11314 ins_encode %{
11315 __ imulq($dst$$Register, $src$$Register);
11316 %}
11317 ins_pipe(ialu_reg_reg_alu0);
11318 %}
11319
11320 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11321 %{
11322 predicate(UseAPX);
11323 match(Set dst (MulL src1 src2));
11324 effect(KILL cr);
11325 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11326
11327 ins_cost(300);
11328 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11329 ins_encode %{
11330 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11331 %}
11332 ins_pipe(ialu_reg_reg_alu0);
11333 %}
11334
11335 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11336 %{
11337 match(Set dst (MulL src imm));
11338 effect(KILL cr);
11339
11340 ins_cost(300);
11341 format %{ "imulq $dst, $src, $imm\t# long" %}
11342 ins_encode %{
11343 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11344 %}
11345 ins_pipe(ialu_reg_reg_alu0);
11346 %}
11347
11348 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11349 %{
11350 match(Set dst (MulL dst (LoadL src)));
11351 effect(KILL cr);
11352
11353 ins_cost(350);
11354 format %{ "imulq $dst, $src\t# long" %}
11355 ins_encode %{
11356 __ imulq($dst$$Register, $src$$Address);
11357 %}
11358 ins_pipe(ialu_reg_mem_alu0);
11359 %}
11360
11361
11362 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11363 %{
11364 match(Set dst (MulL (LoadL src) imm));
11365 effect(KILL cr);
11366
11367 ins_cost(300);
11368 format %{ "imulq $dst, $src, $imm\t# long" %}
11369 ins_encode %{
11370 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11371 %}
11372 ins_pipe(ialu_reg_mem_alu0);
11373 %}
11374
11375 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11376 %{
11377 match(Set dst (MulHiL src rax));
11378 effect(USE_KILL rax, KILL cr);
11379
11380 ins_cost(300);
11381 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11382 ins_encode %{
11383 __ imulq($src$$Register);
11384 %}
11385 ins_pipe(ialu_reg_reg_alu0);
11386 %}
11387
11388 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11389 %{
11390 match(Set dst (UMulHiL src rax));
11391 effect(USE_KILL rax, KILL cr);
11392
11393 ins_cost(300);
11394 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11395 ins_encode %{
11396 __ mulq($src$$Register);
11397 %}
11398 ins_pipe(ialu_reg_reg_alu0);
11399 %}
11400
11401 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11402 rFlagsReg cr)
11403 %{
11404 match(Set rax (DivI rax div));
11405 effect(KILL rdx, KILL cr);
11406
11407 ins_cost(30*100+10*100); // XXX
11408 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11409 "jne,s normal\n\t"
11410 "xorl rdx, rdx\n\t"
11411 "cmpl $div, -1\n\t"
11412 "je,s done\n"
11413 "normal: cdql\n\t"
11414 "idivl $div\n"
11415 "done:" %}
11416 ins_encode(cdql_enc(div));
11417 ins_pipe(ialu_reg_reg_alu0);
11418 %}
11419
11420 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11421 rFlagsReg cr)
11422 %{
11423 match(Set rax (DivL rax div));
11424 effect(KILL rdx, KILL cr);
11425
11426 ins_cost(30*100+10*100); // XXX
11427 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11428 "cmpq rax, rdx\n\t"
11429 "jne,s normal\n\t"
11430 "xorl rdx, rdx\n\t"
11431 "cmpq $div, -1\n\t"
11432 "je,s done\n"
11433 "normal: cdqq\n\t"
11434 "idivq $div\n"
11435 "done:" %}
11436 ins_encode(cdqq_enc(div));
11437 ins_pipe(ialu_reg_reg_alu0);
11438 %}
11439
11440 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11441 %{
11442 match(Set rax (UDivI rax div));
11443 effect(KILL rdx, KILL cr);
11444
11445 ins_cost(300);
11446 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11447 ins_encode %{
11448 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11449 %}
11450 ins_pipe(ialu_reg_reg_alu0);
11451 %}
11452
11453 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11454 %{
11455 match(Set rax (UDivL rax div));
11456 effect(KILL rdx, KILL cr);
11457
11458 ins_cost(300);
11459 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11460 ins_encode %{
11461 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11462 %}
11463 ins_pipe(ialu_reg_reg_alu0);
11464 %}
11465
11466 // Integer DIVMOD with Register, both quotient and mod results
11467 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11468 rFlagsReg cr)
11469 %{
11470 match(DivModI rax div);
11471 effect(KILL cr);
11472
11473 ins_cost(30*100+10*100); // XXX
11474 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11475 "jne,s normal\n\t"
11476 "xorl rdx, rdx\n\t"
11477 "cmpl $div, -1\n\t"
11478 "je,s done\n"
11479 "normal: cdql\n\t"
11480 "idivl $div\n"
11481 "done:" %}
11482 ins_encode(cdql_enc(div));
11483 ins_pipe(pipe_slow);
11484 %}
11485
11486 // Long DIVMOD with Register, both quotient and mod results
11487 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11488 rFlagsReg cr)
11489 %{
11490 match(DivModL rax div);
11491 effect(KILL cr);
11492
11493 ins_cost(30*100+10*100); // XXX
11494 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11495 "cmpq rax, rdx\n\t"
11496 "jne,s normal\n\t"
11497 "xorl rdx, rdx\n\t"
11498 "cmpq $div, -1\n\t"
11499 "je,s done\n"
11500 "normal: cdqq\n\t"
11501 "idivq $div\n"
11502 "done:" %}
11503 ins_encode(cdqq_enc(div));
11504 ins_pipe(pipe_slow);
11505 %}
11506
11507 // Unsigned integer DIVMOD with Register, both quotient and mod results
11508 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11509 no_rax_rdx_RegI div, rFlagsReg cr)
11510 %{
11511 match(UDivModI rax div);
11512 effect(TEMP tmp, KILL cr);
11513
11514 ins_cost(300);
11515 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11516 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11517 %}
11518 ins_encode %{
11519 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11520 %}
11521 ins_pipe(pipe_slow);
11522 %}
11523
11524 // Unsigned long DIVMOD with Register, both quotient and mod results
11525 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11526 no_rax_rdx_RegL div, rFlagsReg cr)
11527 %{
11528 match(UDivModL rax div);
11529 effect(TEMP tmp, KILL cr);
11530
11531 ins_cost(300);
11532 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11533 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11534 %}
11535 ins_encode %{
11536 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11537 %}
11538 ins_pipe(pipe_slow);
11539 %}
11540
11541 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11542 rFlagsReg cr)
11543 %{
11544 match(Set rdx (ModI rax div));
11545 effect(KILL rax, KILL cr);
11546
11547 ins_cost(300); // XXX
11548 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11549 "jne,s normal\n\t"
11550 "xorl rdx, rdx\n\t"
11551 "cmpl $div, -1\n\t"
11552 "je,s done\n"
11553 "normal: cdql\n\t"
11554 "idivl $div\n"
11555 "done:" %}
11556 ins_encode(cdql_enc(div));
11557 ins_pipe(ialu_reg_reg_alu0);
11558 %}
11559
11560 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11561 rFlagsReg cr)
11562 %{
11563 match(Set rdx (ModL rax div));
11564 effect(KILL rax, KILL cr);
11565
11566 ins_cost(300); // XXX
11567 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11568 "cmpq rax, rdx\n\t"
11569 "jne,s normal\n\t"
11570 "xorl rdx, rdx\n\t"
11571 "cmpq $div, -1\n\t"
11572 "je,s done\n"
11573 "normal: cdqq\n\t"
11574 "idivq $div\n"
11575 "done:" %}
11576 ins_encode(cdqq_enc(div));
11577 ins_pipe(ialu_reg_reg_alu0);
11578 %}
11579
11580 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11581 %{
11582 match(Set rdx (UModI rax div));
11583 effect(KILL rax, KILL cr);
11584
11585 ins_cost(300);
11586 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11587 ins_encode %{
11588 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11589 %}
11590 ins_pipe(ialu_reg_reg_alu0);
11591 %}
11592
11593 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11594 %{
11595 match(Set rdx (UModL rax div));
11596 effect(KILL rax, KILL cr);
11597
11598 ins_cost(300);
11599 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11600 ins_encode %{
11601 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11602 %}
11603 ins_pipe(ialu_reg_reg_alu0);
11604 %}
11605
11606 // Integer Shift Instructions
11607 // Shift Left by one, two, three
11608 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11609 %{
11610 predicate(!UseAPX);
11611 match(Set dst (LShiftI dst shift));
11612 effect(KILL cr);
11613
11614 format %{ "sall $dst, $shift" %}
11615 ins_encode %{
11616 __ sall($dst$$Register, $shift$$constant);
11617 %}
11618 ins_pipe(ialu_reg);
11619 %}
11620
11621 // Shift Left by one, two, three
11622 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11623 %{
11624 predicate(UseAPX);
11625 match(Set dst (LShiftI src shift));
11626 effect(KILL cr);
11627 flag(PD::Flag_ndd_demotable_opr1);
11628
11629 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11630 ins_encode %{
11631 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11632 %}
11633 ins_pipe(ialu_reg);
11634 %}
11635
11636 // Shift Left by 8-bit immediate
11637 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11638 %{
11639 predicate(!UseAPX);
11640 match(Set dst (LShiftI dst shift));
11641 effect(KILL cr);
11642
11643 format %{ "sall $dst, $shift" %}
11644 ins_encode %{
11645 __ sall($dst$$Register, $shift$$constant);
11646 %}
11647 ins_pipe(ialu_reg);
11648 %}
11649
11650 // Shift Left by 8-bit immediate
11651 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11652 %{
11653 predicate(UseAPX);
11654 match(Set dst (LShiftI src shift));
11655 effect(KILL cr);
11656 flag(PD::Flag_ndd_demotable_opr1);
11657
11658 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11659 ins_encode %{
11660 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11661 %}
11662 ins_pipe(ialu_reg);
11663 %}
11664
11665 // Shift Left by 8-bit immediate
11666 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11667 %{
11668 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11669 effect(KILL cr);
11670
11671 format %{ "sall $dst, $shift" %}
11672 ins_encode %{
11673 __ sall($dst$$Address, $shift$$constant);
11674 %}
11675 ins_pipe(ialu_mem_imm);
11676 %}
11677
11678 // Shift Left by variable
11679 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11680 %{
11681 predicate(!VM_Version::supports_bmi2());
11682 match(Set dst (LShiftI dst shift));
11683 effect(KILL cr);
11684
11685 format %{ "sall $dst, $shift" %}
11686 ins_encode %{
11687 __ sall($dst$$Register);
11688 %}
11689 ins_pipe(ialu_reg_reg);
11690 %}
11691
11692 // Shift Left by variable
11693 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11694 %{
11695 predicate(!VM_Version::supports_bmi2());
11696 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11697 effect(KILL cr);
11698
11699 format %{ "sall $dst, $shift" %}
11700 ins_encode %{
11701 __ sall($dst$$Address);
11702 %}
11703 ins_pipe(ialu_mem_reg);
11704 %}
11705
11706 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11707 %{
11708 predicate(VM_Version::supports_bmi2());
11709 match(Set dst (LShiftI src shift));
11710
11711 format %{ "shlxl $dst, $src, $shift" %}
11712 ins_encode %{
11713 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11714 %}
11715 ins_pipe(ialu_reg_reg);
11716 %}
11717
11718 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11719 %{
11720 predicate(VM_Version::supports_bmi2());
11721 match(Set dst (LShiftI (LoadI src) shift));
11722 ins_cost(175);
11723 format %{ "shlxl $dst, $src, $shift" %}
11724 ins_encode %{
11725 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11726 %}
11727 ins_pipe(ialu_reg_mem);
11728 %}
11729
11730 // Arithmetic Shift Right by 8-bit immediate
11731 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11732 %{
11733 predicate(!UseAPX);
11734 match(Set dst (RShiftI dst shift));
11735 effect(KILL cr);
11736
11737 format %{ "sarl $dst, $shift" %}
11738 ins_encode %{
11739 __ sarl($dst$$Register, $shift$$constant);
11740 %}
11741 ins_pipe(ialu_mem_imm);
11742 %}
11743
11744 // Arithmetic Shift Right by 8-bit immediate
11745 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11746 %{
11747 predicate(UseAPX);
11748 match(Set dst (RShiftI src shift));
11749 effect(KILL cr);
11750 flag(PD::Flag_ndd_demotable_opr1);
11751
11752 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11753 ins_encode %{
11754 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11755 %}
11756 ins_pipe(ialu_mem_imm);
11757 %}
11758
11759 // Arithmetic Shift Right by 8-bit immediate
11760 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11761 %{
11762 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11763 effect(KILL cr);
11764
11765 format %{ "sarl $dst, $shift" %}
11766 ins_encode %{
11767 __ sarl($dst$$Address, $shift$$constant);
11768 %}
11769 ins_pipe(ialu_mem_imm);
11770 %}
11771
11772 // Arithmetic Shift Right by variable
11773 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11774 %{
11775 predicate(!VM_Version::supports_bmi2());
11776 match(Set dst (RShiftI dst shift));
11777 effect(KILL cr);
11778
11779 format %{ "sarl $dst, $shift" %}
11780 ins_encode %{
11781 __ sarl($dst$$Register);
11782 %}
11783 ins_pipe(ialu_reg_reg);
11784 %}
11785
11786 // Arithmetic Shift Right by variable
11787 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11788 %{
11789 predicate(!VM_Version::supports_bmi2());
11790 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11791 effect(KILL cr);
11792
11793 format %{ "sarl $dst, $shift" %}
11794 ins_encode %{
11795 __ sarl($dst$$Address);
11796 %}
11797 ins_pipe(ialu_mem_reg);
11798 %}
11799
11800 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11801 %{
11802 predicate(VM_Version::supports_bmi2());
11803 match(Set dst (RShiftI src shift));
11804
11805 format %{ "sarxl $dst, $src, $shift" %}
11806 ins_encode %{
11807 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11808 %}
11809 ins_pipe(ialu_reg_reg);
11810 %}
11811
11812 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11813 %{
11814 predicate(VM_Version::supports_bmi2());
11815 match(Set dst (RShiftI (LoadI src) shift));
11816 ins_cost(175);
11817 format %{ "sarxl $dst, $src, $shift" %}
11818 ins_encode %{
11819 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11820 %}
11821 ins_pipe(ialu_reg_mem);
11822 %}
11823
11824 // Logical Shift Right by 8-bit immediate
11825 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11826 %{
11827 predicate(!UseAPX);
11828 match(Set dst (URShiftI dst shift));
11829 effect(KILL cr);
11830
11831 format %{ "shrl $dst, $shift" %}
11832 ins_encode %{
11833 __ shrl($dst$$Register, $shift$$constant);
11834 %}
11835 ins_pipe(ialu_reg);
11836 %}
11837
11838 // Logical Shift Right by 8-bit immediate
11839 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11840 %{
11841 predicate(UseAPX);
11842 match(Set dst (URShiftI src shift));
11843 effect(KILL cr);
11844 flag(PD::Flag_ndd_demotable_opr1);
11845
11846 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
11847 ins_encode %{
11848 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
11849 %}
11850 ins_pipe(ialu_reg);
11851 %}
11852
11853 // Logical Shift Right by 8-bit immediate
11854 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11855 %{
11856 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11857 effect(KILL cr);
11858
11859 format %{ "shrl $dst, $shift" %}
11860 ins_encode %{
11861 __ shrl($dst$$Address, $shift$$constant);
11862 %}
11863 ins_pipe(ialu_mem_imm);
11864 %}
11865
11866 // Logical Shift Right by variable
11867 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11868 %{
11869 predicate(!VM_Version::supports_bmi2());
11870 match(Set dst (URShiftI dst shift));
11871 effect(KILL cr);
11872
11873 format %{ "shrl $dst, $shift" %}
11874 ins_encode %{
11875 __ shrl($dst$$Register);
11876 %}
11877 ins_pipe(ialu_reg_reg);
11878 %}
11879
11880 // Logical Shift Right by variable
11881 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11882 %{
11883 predicate(!VM_Version::supports_bmi2());
11884 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11885 effect(KILL cr);
11886
11887 format %{ "shrl $dst, $shift" %}
11888 ins_encode %{
11889 __ shrl($dst$$Address);
11890 %}
11891 ins_pipe(ialu_mem_reg);
11892 %}
11893
11894 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11895 %{
11896 predicate(VM_Version::supports_bmi2());
11897 match(Set dst (URShiftI src shift));
11898
11899 format %{ "shrxl $dst, $src, $shift" %}
11900 ins_encode %{
11901 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
11902 %}
11903 ins_pipe(ialu_reg_reg);
11904 %}
11905
11906 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
11907 %{
11908 predicate(VM_Version::supports_bmi2());
11909 match(Set dst (URShiftI (LoadI src) shift));
11910 ins_cost(175);
11911 format %{ "shrxl $dst, $src, $shift" %}
11912 ins_encode %{
11913 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
11914 %}
11915 ins_pipe(ialu_reg_mem);
11916 %}
11917
11918 // Long Shift Instructions
11919 // Shift Left by one, two, three
11920 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
11921 %{
11922 predicate(!UseAPX);
11923 match(Set dst (LShiftL dst shift));
11924 effect(KILL cr);
11925
11926 format %{ "salq $dst, $shift" %}
11927 ins_encode %{
11928 __ salq($dst$$Register, $shift$$constant);
11929 %}
11930 ins_pipe(ialu_reg);
11931 %}
11932
11933 // Shift Left by one, two, three
11934 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
11935 %{
11936 predicate(UseAPX);
11937 match(Set dst (LShiftL src shift));
11938 effect(KILL cr);
11939 flag(PD::Flag_ndd_demotable_opr1);
11940
11941 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
11942 ins_encode %{
11943 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11944 %}
11945 ins_pipe(ialu_reg);
11946 %}
11947
11948 // Shift Left by 8-bit immediate
11949 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
11950 %{
11951 predicate(!UseAPX);
11952 match(Set dst (LShiftL dst shift));
11953 effect(KILL cr);
11954
11955 format %{ "salq $dst, $shift" %}
11956 ins_encode %{
11957 __ salq($dst$$Register, $shift$$constant);
11958 %}
11959 ins_pipe(ialu_reg);
11960 %}
11961
11962 // Shift Left by 8-bit immediate
11963 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
11964 %{
11965 predicate(UseAPX);
11966 match(Set dst (LShiftL src shift));
11967 effect(KILL cr);
11968 flag(PD::Flag_ndd_demotable_opr1);
11969
11970 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
11971 ins_encode %{
11972 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11973 %}
11974 ins_pipe(ialu_reg);
11975 %}
11976
11977 // Shift Left by 8-bit immediate
11978 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11979 %{
11980 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
11981 effect(KILL cr);
11982
11983 format %{ "salq $dst, $shift" %}
11984 ins_encode %{
11985 __ salq($dst$$Address, $shift$$constant);
11986 %}
11987 ins_pipe(ialu_mem_imm);
11988 %}
11989
11990 // Shift Left by variable
11991 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
11992 %{
11993 predicate(!VM_Version::supports_bmi2());
11994 match(Set dst (LShiftL dst shift));
11995 effect(KILL cr);
11996
11997 format %{ "salq $dst, $shift" %}
11998 ins_encode %{
11999 __ salq($dst$$Register);
12000 %}
12001 ins_pipe(ialu_reg_reg);
12002 %}
12003
12004 // Shift Left by variable
12005 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12006 %{
12007 predicate(!VM_Version::supports_bmi2());
12008 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12009 effect(KILL cr);
12010
12011 format %{ "salq $dst, $shift" %}
12012 ins_encode %{
12013 __ salq($dst$$Address);
12014 %}
12015 ins_pipe(ialu_mem_reg);
12016 %}
12017
12018 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12019 %{
12020 predicate(VM_Version::supports_bmi2());
12021 match(Set dst (LShiftL src shift));
12022
12023 format %{ "shlxq $dst, $src, $shift" %}
12024 ins_encode %{
12025 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12026 %}
12027 ins_pipe(ialu_reg_reg);
12028 %}
12029
12030 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12031 %{
12032 predicate(VM_Version::supports_bmi2());
12033 match(Set dst (LShiftL (LoadL src) shift));
12034 ins_cost(175);
12035 format %{ "shlxq $dst, $src, $shift" %}
12036 ins_encode %{
12037 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12038 %}
12039 ins_pipe(ialu_reg_mem);
12040 %}
12041
12042 // Arithmetic Shift Right by 8-bit immediate
12043 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12044 %{
12045 predicate(!UseAPX);
12046 match(Set dst (RShiftL dst shift));
12047 effect(KILL cr);
12048
12049 format %{ "sarq $dst, $shift" %}
12050 ins_encode %{
12051 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12052 %}
12053 ins_pipe(ialu_mem_imm);
12054 %}
12055
12056 // Arithmetic Shift Right by 8-bit immediate
12057 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12058 %{
12059 predicate(UseAPX);
12060 match(Set dst (RShiftL src shift));
12061 effect(KILL cr);
12062 flag(PD::Flag_ndd_demotable_opr1);
12063
12064 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12065 ins_encode %{
12066 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12067 %}
12068 ins_pipe(ialu_mem_imm);
12069 %}
12070
12071 // Arithmetic Shift Right by 8-bit immediate
12072 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12073 %{
12074 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12075 effect(KILL cr);
12076
12077 format %{ "sarq $dst, $shift" %}
12078 ins_encode %{
12079 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12080 %}
12081 ins_pipe(ialu_mem_imm);
12082 %}
12083
12084 // Arithmetic Shift Right by variable
12085 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12086 %{
12087 predicate(!VM_Version::supports_bmi2());
12088 match(Set dst (RShiftL dst shift));
12089 effect(KILL cr);
12090
12091 format %{ "sarq $dst, $shift" %}
12092 ins_encode %{
12093 __ sarq($dst$$Register);
12094 %}
12095 ins_pipe(ialu_reg_reg);
12096 %}
12097
12098 // Arithmetic Shift Right by variable
12099 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12100 %{
12101 predicate(!VM_Version::supports_bmi2());
12102 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12103 effect(KILL cr);
12104
12105 format %{ "sarq $dst, $shift" %}
12106 ins_encode %{
12107 __ sarq($dst$$Address);
12108 %}
12109 ins_pipe(ialu_mem_reg);
12110 %}
12111
12112 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12113 %{
12114 predicate(VM_Version::supports_bmi2());
12115 match(Set dst (RShiftL src shift));
12116
12117 format %{ "sarxq $dst, $src, $shift" %}
12118 ins_encode %{
12119 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12120 %}
12121 ins_pipe(ialu_reg_reg);
12122 %}
12123
12124 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12125 %{
12126 predicate(VM_Version::supports_bmi2());
12127 match(Set dst (RShiftL (LoadL src) shift));
12128 ins_cost(175);
12129 format %{ "sarxq $dst, $src, $shift" %}
12130 ins_encode %{
12131 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12132 %}
12133 ins_pipe(ialu_reg_mem);
12134 %}
12135
12136 // Logical Shift Right by 8-bit immediate
12137 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12138 %{
12139 predicate(!UseAPX);
12140 match(Set dst (URShiftL dst shift));
12141 effect(KILL cr);
12142
12143 format %{ "shrq $dst, $shift" %}
12144 ins_encode %{
12145 __ shrq($dst$$Register, $shift$$constant);
12146 %}
12147 ins_pipe(ialu_reg);
12148 %}
12149
12150 // Logical Shift Right by 8-bit immediate
12151 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12152 %{
12153 predicate(UseAPX);
12154 match(Set dst (URShiftL src shift));
12155 effect(KILL cr);
12156 flag(PD::Flag_ndd_demotable_opr1);
12157
12158 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12159 ins_encode %{
12160 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12161 %}
12162 ins_pipe(ialu_reg);
12163 %}
12164
12165 // Logical Shift Right by 8-bit immediate
12166 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12167 %{
12168 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12169 effect(KILL cr);
12170
12171 format %{ "shrq $dst, $shift" %}
12172 ins_encode %{
12173 __ shrq($dst$$Address, $shift$$constant);
12174 %}
12175 ins_pipe(ialu_mem_imm);
12176 %}
12177
12178 // Logical Shift Right by variable
12179 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12180 %{
12181 predicate(!VM_Version::supports_bmi2());
12182 match(Set dst (URShiftL dst shift));
12183 effect(KILL cr);
12184
12185 format %{ "shrq $dst, $shift" %}
12186 ins_encode %{
12187 __ shrq($dst$$Register);
12188 %}
12189 ins_pipe(ialu_reg_reg);
12190 %}
12191
12192 // Logical Shift Right by variable
12193 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12194 %{
12195 predicate(!VM_Version::supports_bmi2());
12196 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12197 effect(KILL cr);
12198
12199 format %{ "shrq $dst, $shift" %}
12200 ins_encode %{
12201 __ shrq($dst$$Address);
12202 %}
12203 ins_pipe(ialu_mem_reg);
12204 %}
12205
12206 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12207 %{
12208 predicate(VM_Version::supports_bmi2());
12209 match(Set dst (URShiftL src shift));
12210
12211 format %{ "shrxq $dst, $src, $shift" %}
12212 ins_encode %{
12213 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12214 %}
12215 ins_pipe(ialu_reg_reg);
12216 %}
12217
12218 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12219 %{
12220 predicate(VM_Version::supports_bmi2());
12221 match(Set dst (URShiftL (LoadL src) shift));
12222 ins_cost(175);
12223 format %{ "shrxq $dst, $src, $shift" %}
12224 ins_encode %{
12225 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12226 %}
12227 ins_pipe(ialu_reg_mem);
12228 %}
12229
12230 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12231 // This idiom is used by the compiler for the i2b bytecode.
12232 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12233 %{
12234 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12235
12236 format %{ "movsbl $dst, $src\t# i2b" %}
12237 ins_encode %{
12238 __ movsbl($dst$$Register, $src$$Register);
12239 %}
12240 ins_pipe(ialu_reg_reg);
12241 %}
12242
12243 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12244 // This idiom is used by the compiler the i2s bytecode.
12245 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12246 %{
12247 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12248
12249 format %{ "movswl $dst, $src\t# i2s" %}
12250 ins_encode %{
12251 __ movswl($dst$$Register, $src$$Register);
12252 %}
12253 ins_pipe(ialu_reg_reg);
12254 %}
12255
12256 // ROL/ROR instructions
12257
12258 // Rotate left by constant.
12259 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12260 %{
12261 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12262 match(Set dst (RotateLeft dst shift));
12263 effect(KILL cr);
12264 format %{ "roll $dst, $shift" %}
12265 ins_encode %{
12266 __ roll($dst$$Register, $shift$$constant);
12267 %}
12268 ins_pipe(ialu_reg);
12269 %}
12270
12271 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12272 %{
12273 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12274 match(Set dst (RotateLeft src shift));
12275 format %{ "rolxl $dst, $src, $shift" %}
12276 ins_encode %{
12277 int shift = 32 - ($shift$$constant & 31);
12278 __ rorxl($dst$$Register, $src$$Register, shift);
12279 %}
12280 ins_pipe(ialu_reg_reg);
12281 %}
12282
12283 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12284 %{
12285 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12286 match(Set dst (RotateLeft (LoadI src) shift));
12287 ins_cost(175);
12288 format %{ "rolxl $dst, $src, $shift" %}
12289 ins_encode %{
12290 int shift = 32 - ($shift$$constant & 31);
12291 __ rorxl($dst$$Register, $src$$Address, shift);
12292 %}
12293 ins_pipe(ialu_reg_mem);
12294 %}
12295
12296 // Rotate Left by variable
12297 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12298 %{
12299 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12300 match(Set dst (RotateLeft dst shift));
12301 effect(KILL cr);
12302 format %{ "roll $dst, $shift" %}
12303 ins_encode %{
12304 __ roll($dst$$Register);
12305 %}
12306 ins_pipe(ialu_reg_reg);
12307 %}
12308
12309 // Rotate Left by variable
12310 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12311 %{
12312 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12313 match(Set dst (RotateLeft src shift));
12314 effect(KILL cr);
12315 flag(PD::Flag_ndd_demotable_opr1);
12316
12317 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12318 ins_encode %{
12319 __ eroll($dst$$Register, $src$$Register, false);
12320 %}
12321 ins_pipe(ialu_reg_reg);
12322 %}
12323
12324 // Rotate Right by constant.
12325 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12326 %{
12327 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12328 match(Set dst (RotateRight dst shift));
12329 effect(KILL cr);
12330 format %{ "rorl $dst, $shift" %}
12331 ins_encode %{
12332 __ rorl($dst$$Register, $shift$$constant);
12333 %}
12334 ins_pipe(ialu_reg);
12335 %}
12336
12337 // Rotate Right by constant.
12338 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12339 %{
12340 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12341 match(Set dst (RotateRight src shift));
12342 format %{ "rorxl $dst, $src, $shift" %}
12343 ins_encode %{
12344 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12345 %}
12346 ins_pipe(ialu_reg_reg);
12347 %}
12348
12349 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12350 %{
12351 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12352 match(Set dst (RotateRight (LoadI src) shift));
12353 ins_cost(175);
12354 format %{ "rorxl $dst, $src, $shift" %}
12355 ins_encode %{
12356 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12357 %}
12358 ins_pipe(ialu_reg_mem);
12359 %}
12360
12361 // Rotate Right by variable
12362 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12363 %{
12364 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12365 match(Set dst (RotateRight dst shift));
12366 effect(KILL cr);
12367 format %{ "rorl $dst, $shift" %}
12368 ins_encode %{
12369 __ rorl($dst$$Register);
12370 %}
12371 ins_pipe(ialu_reg_reg);
12372 %}
12373
12374 // Rotate Right by variable
12375 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12376 %{
12377 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12378 match(Set dst (RotateRight src shift));
12379 effect(KILL cr);
12380 flag(PD::Flag_ndd_demotable_opr1);
12381
12382 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12383 ins_encode %{
12384 __ erorl($dst$$Register, $src$$Register, false);
12385 %}
12386 ins_pipe(ialu_reg_reg);
12387 %}
12388
12389 // Rotate Left by constant.
12390 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12391 %{
12392 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12393 match(Set dst (RotateLeft dst shift));
12394 effect(KILL cr);
12395 format %{ "rolq $dst, $shift" %}
12396 ins_encode %{
12397 __ rolq($dst$$Register, $shift$$constant);
12398 %}
12399 ins_pipe(ialu_reg);
12400 %}
12401
12402 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12403 %{
12404 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12405 match(Set dst (RotateLeft src shift));
12406 format %{ "rolxq $dst, $src, $shift" %}
12407 ins_encode %{
12408 int shift = 64 - ($shift$$constant & 63);
12409 __ rorxq($dst$$Register, $src$$Register, shift);
12410 %}
12411 ins_pipe(ialu_reg_reg);
12412 %}
12413
12414 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12415 %{
12416 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12417 match(Set dst (RotateLeft (LoadL src) shift));
12418 ins_cost(175);
12419 format %{ "rolxq $dst, $src, $shift" %}
12420 ins_encode %{
12421 int shift = 64 - ($shift$$constant & 63);
12422 __ rorxq($dst$$Register, $src$$Address, shift);
12423 %}
12424 ins_pipe(ialu_reg_mem);
12425 %}
12426
12427 // Rotate Left by variable
12428 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12429 %{
12430 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12431 match(Set dst (RotateLeft dst shift));
12432 effect(KILL cr);
12433
12434 format %{ "rolq $dst, $shift" %}
12435 ins_encode %{
12436 __ rolq($dst$$Register);
12437 %}
12438 ins_pipe(ialu_reg_reg);
12439 %}
12440
12441 // Rotate Left by variable
12442 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12443 %{
12444 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12445 match(Set dst (RotateLeft src shift));
12446 effect(KILL cr);
12447 flag(PD::Flag_ndd_demotable_opr1);
12448
12449 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12450 ins_encode %{
12451 __ erolq($dst$$Register, $src$$Register, false);
12452 %}
12453 ins_pipe(ialu_reg_reg);
12454 %}
12455
12456 // Rotate Right by constant.
12457 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12458 %{
12459 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12460 match(Set dst (RotateRight dst shift));
12461 effect(KILL cr);
12462 format %{ "rorq $dst, $shift" %}
12463 ins_encode %{
12464 __ rorq($dst$$Register, $shift$$constant);
12465 %}
12466 ins_pipe(ialu_reg);
12467 %}
12468
12469 // Rotate Right by constant
12470 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12471 %{
12472 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12473 match(Set dst (RotateRight src shift));
12474 format %{ "rorxq $dst, $src, $shift" %}
12475 ins_encode %{
12476 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12477 %}
12478 ins_pipe(ialu_reg_reg);
12479 %}
12480
12481 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12482 %{
12483 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12484 match(Set dst (RotateRight (LoadL src) shift));
12485 ins_cost(175);
12486 format %{ "rorxq $dst, $src, $shift" %}
12487 ins_encode %{
12488 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12489 %}
12490 ins_pipe(ialu_reg_mem);
12491 %}
12492
12493 // Rotate Right by variable
12494 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12495 %{
12496 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12497 match(Set dst (RotateRight dst shift));
12498 effect(KILL cr);
12499 format %{ "rorq $dst, $shift" %}
12500 ins_encode %{
12501 __ rorq($dst$$Register);
12502 %}
12503 ins_pipe(ialu_reg_reg);
12504 %}
12505
12506 // Rotate Right by variable
12507 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12508 %{
12509 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12510 match(Set dst (RotateRight src shift));
12511 effect(KILL cr);
12512 flag(PD::Flag_ndd_demotable_opr1);
12513
12514 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12515 ins_encode %{
12516 __ erorq($dst$$Register, $src$$Register, false);
12517 %}
12518 ins_pipe(ialu_reg_reg);
12519 %}
12520
12521 //----------------------------- CompressBits/ExpandBits ------------------------
12522
12523 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12524 predicate(n->bottom_type()->isa_long());
12525 match(Set dst (CompressBits src mask));
12526 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12527 ins_encode %{
12528 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12529 %}
12530 ins_pipe( pipe_slow );
12531 %}
12532
12533 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12534 predicate(n->bottom_type()->isa_long());
12535 match(Set dst (ExpandBits src mask));
12536 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12537 ins_encode %{
12538 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12539 %}
12540 ins_pipe( pipe_slow );
12541 %}
12542
12543 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12544 predicate(n->bottom_type()->isa_long());
12545 match(Set dst (CompressBits src (LoadL mask)));
12546 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12547 ins_encode %{
12548 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12549 %}
12550 ins_pipe( pipe_slow );
12551 %}
12552
12553 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12554 predicate(n->bottom_type()->isa_long());
12555 match(Set dst (ExpandBits src (LoadL mask)));
12556 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12557 ins_encode %{
12558 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12559 %}
12560 ins_pipe( pipe_slow );
12561 %}
12562
12563
12564 // Logical Instructions
12565
12566 // Integer Logical Instructions
12567
12568 // And Instructions
12569 // And Register with Register
12570 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12571 %{
12572 predicate(!UseAPX);
12573 match(Set dst (AndI dst src));
12574 effect(KILL cr);
12575 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12576
12577 format %{ "andl $dst, $src\t# int" %}
12578 ins_encode %{
12579 __ andl($dst$$Register, $src$$Register);
12580 %}
12581 ins_pipe(ialu_reg_reg);
12582 %}
12583
12584 // And Register with Register using New Data Destination (NDD)
12585 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12586 %{
12587 predicate(UseAPX);
12588 match(Set dst (AndI src1 src2));
12589 effect(KILL cr);
12590 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12591
12592 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12593 ins_encode %{
12594 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12595
12596 %}
12597 ins_pipe(ialu_reg_reg);
12598 %}
12599
12600 // And Register with Immediate 255
12601 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12602 %{
12603 match(Set dst (AndI src mask));
12604
12605 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12606 ins_encode %{
12607 __ movzbl($dst$$Register, $src$$Register);
12608 %}
12609 ins_pipe(ialu_reg);
12610 %}
12611
12612 // And Register with Immediate 255 and promote to long
12613 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12614 %{
12615 match(Set dst (ConvI2L (AndI src mask)));
12616
12617 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12618 ins_encode %{
12619 __ movzbl($dst$$Register, $src$$Register);
12620 %}
12621 ins_pipe(ialu_reg);
12622 %}
12623
12624 // And Register with Immediate 65535
12625 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12626 %{
12627 match(Set dst (AndI src mask));
12628
12629 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12630 ins_encode %{
12631 __ movzwl($dst$$Register, $src$$Register);
12632 %}
12633 ins_pipe(ialu_reg);
12634 %}
12635
12636 // And Register with Immediate 65535 and promote to long
12637 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12638 %{
12639 match(Set dst (ConvI2L (AndI src mask)));
12640
12641 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12642 ins_encode %{
12643 __ movzwl($dst$$Register, $src$$Register);
12644 %}
12645 ins_pipe(ialu_reg);
12646 %}
12647
12648 // Can skip int2long conversions after AND with small bitmask
12649 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12650 %{
12651 predicate(VM_Version::supports_bmi2());
12652 ins_cost(125);
12653 effect(TEMP tmp, KILL cr);
12654 match(Set dst (ConvI2L (AndI src mask)));
12655 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12656 ins_encode %{
12657 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12658 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12659 %}
12660 ins_pipe(ialu_reg_reg);
12661 %}
12662
12663 // And Register with Immediate
12664 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12665 %{
12666 predicate(!UseAPX);
12667 match(Set dst (AndI dst src));
12668 effect(KILL cr);
12669 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12670
12671 format %{ "andl $dst, $src\t# int" %}
12672 ins_encode %{
12673 __ andl($dst$$Register, $src$$constant);
12674 %}
12675 ins_pipe(ialu_reg);
12676 %}
12677
12678 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12679 %{
12680 predicate(UseAPX);
12681 match(Set dst (AndI src1 src2));
12682 effect(KILL cr);
12683 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12684
12685 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12686 ins_encode %{
12687 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12688 %}
12689 ins_pipe(ialu_reg);
12690 %}
12691
12692 // And Register with Memory
12693 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12694 %{
12695 match(Set dst (AndI dst (LoadI src)));
12696 effect(KILL cr);
12697 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12698
12699 ins_cost(150);
12700 format %{ "andl $dst, $src\t# int" %}
12701 ins_encode %{
12702 __ andl($dst$$Register, $src$$Address);
12703 %}
12704 ins_pipe(ialu_reg_mem);
12705 %}
12706
12707 // And Memory with Register
12708 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12709 %{
12710 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12711 effect(KILL cr);
12712 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12713
12714 ins_cost(150);
12715 format %{ "andb $dst, $src\t# byte" %}
12716 ins_encode %{
12717 __ andb($dst$$Address, $src$$Register);
12718 %}
12719 ins_pipe(ialu_mem_reg);
12720 %}
12721
12722 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12723 %{
12724 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12725 effect(KILL cr);
12726 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12727
12728 ins_cost(150);
12729 format %{ "andl $dst, $src\t# int" %}
12730 ins_encode %{
12731 __ andl($dst$$Address, $src$$Register);
12732 %}
12733 ins_pipe(ialu_mem_reg);
12734 %}
12735
12736 // And Memory with Immediate
12737 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12738 %{
12739 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12740 effect(KILL cr);
12741 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12742
12743 ins_cost(125);
12744 format %{ "andl $dst, $src\t# int" %}
12745 ins_encode %{
12746 __ andl($dst$$Address, $src$$constant);
12747 %}
12748 ins_pipe(ialu_mem_imm);
12749 %}
12750
12751 // BMI1 instructions
12752 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12753 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12754 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12755 effect(KILL cr);
12756 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12757
12758 ins_cost(125);
12759 format %{ "andnl $dst, $src1, $src2" %}
12760
12761 ins_encode %{
12762 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
12763 %}
12764 ins_pipe(ialu_reg_mem);
12765 %}
12766
12767 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
12768 match(Set dst (AndI (XorI src1 minus_1) src2));
12769 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12770 effect(KILL cr);
12771 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12772
12773 format %{ "andnl $dst, $src1, $src2" %}
12774
12775 ins_encode %{
12776 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
12777 %}
12778 ins_pipe(ialu_reg);
12779 %}
12780
12781 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
12782 match(Set dst (AndI (SubI imm_zero src) src));
12783 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12784 effect(KILL cr);
12785 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12786
12787 format %{ "blsil $dst, $src" %}
12788
12789 ins_encode %{
12790 __ blsil($dst$$Register, $src$$Register);
12791 %}
12792 ins_pipe(ialu_reg);
12793 %}
12794
12795 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
12796 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
12797 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12798 effect(KILL cr);
12799 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12800
12801 ins_cost(125);
12802 format %{ "blsil $dst, $src" %}
12803
12804 ins_encode %{
12805 __ blsil($dst$$Register, $src$$Address);
12806 %}
12807 ins_pipe(ialu_reg_mem);
12808 %}
12809
12810 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12811 %{
12812 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
12813 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12814 effect(KILL cr);
12815 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12816
12817 ins_cost(125);
12818 format %{ "blsmskl $dst, $src" %}
12819
12820 ins_encode %{
12821 __ blsmskl($dst$$Register, $src$$Address);
12822 %}
12823 ins_pipe(ialu_reg_mem);
12824 %}
12825
12826 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12827 %{
12828 match(Set dst (XorI (AddI src minus_1) src));
12829 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12830 effect(KILL cr);
12831 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12832
12833 format %{ "blsmskl $dst, $src" %}
12834
12835 ins_encode %{
12836 __ blsmskl($dst$$Register, $src$$Register);
12837 %}
12838
12839 ins_pipe(ialu_reg);
12840 %}
12841
12842 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12843 %{
12844 match(Set dst (AndI (AddI src minus_1) src) );
12845 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12846 effect(KILL cr);
12847 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12848
12849 format %{ "blsrl $dst, $src" %}
12850
12851 ins_encode %{
12852 __ blsrl($dst$$Register, $src$$Register);
12853 %}
12854
12855 ins_pipe(ialu_reg_mem);
12856 %}
12857
12858 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12859 %{
12860 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
12861 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12862 effect(KILL cr);
12863 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12864
12865 ins_cost(125);
12866 format %{ "blsrl $dst, $src" %}
12867
12868 ins_encode %{
12869 __ blsrl($dst$$Register, $src$$Address);
12870 %}
12871
12872 ins_pipe(ialu_reg);
12873 %}
12874
12875 // Or Instructions
12876 // Or Register with Register
12877 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12878 %{
12879 predicate(!UseAPX);
12880 match(Set dst (OrI dst src));
12881 effect(KILL cr);
12882 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12883
12884 format %{ "orl $dst, $src\t# int" %}
12885 ins_encode %{
12886 __ orl($dst$$Register, $src$$Register);
12887 %}
12888 ins_pipe(ialu_reg_reg);
12889 %}
12890
12891 // Or Register with Register using New Data Destination (NDD)
12892 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12893 %{
12894 predicate(UseAPX);
12895 match(Set dst (OrI src1 src2));
12896 effect(KILL cr);
12897 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12898
12899 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
12900 ins_encode %{
12901 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
12902 %}
12903 ins_pipe(ialu_reg_reg);
12904 %}
12905
12906 // Or Register with Immediate
12907 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12908 %{
12909 predicate(!UseAPX);
12910 match(Set dst (OrI dst src));
12911 effect(KILL cr);
12912 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12913
12914 format %{ "orl $dst, $src\t# int" %}
12915 ins_encode %{
12916 __ orl($dst$$Register, $src$$constant);
12917 %}
12918 ins_pipe(ialu_reg);
12919 %}
12920
12921 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12922 %{
12923 predicate(UseAPX);
12924 match(Set dst (OrI src1 src2));
12925 effect(KILL cr);
12926 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12927
12928 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
12929 ins_encode %{
12930 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
12931 %}
12932 ins_pipe(ialu_reg);
12933 %}
12934
12935 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
12936 %{
12937 predicate(UseAPX);
12938 match(Set dst (OrI src1 src2));
12939 effect(KILL cr);
12940 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12941
12942 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
12943 ins_encode %{
12944 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
12945 %}
12946 ins_pipe(ialu_reg);
12947 %}
12948
12949 // Or Register with Memory
12950 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12951 %{
12952 match(Set dst (OrI dst (LoadI src)));
12953 effect(KILL cr);
12954 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12955
12956 ins_cost(150);
12957 format %{ "orl $dst, $src\t# int" %}
12958 ins_encode %{
12959 __ orl($dst$$Register, $src$$Address);
12960 %}
12961 ins_pipe(ialu_reg_mem);
12962 %}
12963
12964 // Or Memory with Register
12965 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12966 %{
12967 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
12968 effect(KILL cr);
12969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12970
12971 ins_cost(150);
12972 format %{ "orb $dst, $src\t# byte" %}
12973 ins_encode %{
12974 __ orb($dst$$Address, $src$$Register);
12975 %}
12976 ins_pipe(ialu_mem_reg);
12977 %}
12978
12979 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12980 %{
12981 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
12982 effect(KILL cr);
12983 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12984
12985 ins_cost(150);
12986 format %{ "orl $dst, $src\t# int" %}
12987 ins_encode %{
12988 __ orl($dst$$Address, $src$$Register);
12989 %}
12990 ins_pipe(ialu_mem_reg);
12991 %}
12992
12993 // Or Memory with Immediate
12994 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
12995 %{
12996 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
12997 effect(KILL cr);
12998 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12999
13000 ins_cost(125);
13001 format %{ "orl $dst, $src\t# int" %}
13002 ins_encode %{
13003 __ orl($dst$$Address, $src$$constant);
13004 %}
13005 ins_pipe(ialu_mem_imm);
13006 %}
13007
13008 // Xor Instructions
13009 // Xor Register with Register
13010 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13011 %{
13012 predicate(!UseAPX);
13013 match(Set dst (XorI dst src));
13014 effect(KILL cr);
13015 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13016
13017 format %{ "xorl $dst, $src\t# int" %}
13018 ins_encode %{
13019 __ xorl($dst$$Register, $src$$Register);
13020 %}
13021 ins_pipe(ialu_reg_reg);
13022 %}
13023
13024 // Xor Register with Register using New Data Destination (NDD)
13025 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13026 %{
13027 predicate(UseAPX);
13028 match(Set dst (XorI src1 src2));
13029 effect(KILL cr);
13030 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13031
13032 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13033 ins_encode %{
13034 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13035 %}
13036 ins_pipe(ialu_reg_reg);
13037 %}
13038
13039 // Xor Register with Immediate -1
13040 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13041 %{
13042 predicate(!UseAPX);
13043 match(Set dst (XorI dst imm));
13044
13045 format %{ "notl $dst" %}
13046 ins_encode %{
13047 __ notl($dst$$Register);
13048 %}
13049 ins_pipe(ialu_reg);
13050 %}
13051
13052 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13053 %{
13054 match(Set dst (XorI src imm));
13055 predicate(UseAPX);
13056 flag(PD::Flag_ndd_demotable_opr1);
13057
13058 format %{ "enotl $dst, $src" %}
13059 ins_encode %{
13060 __ enotl($dst$$Register, $src$$Register);
13061 %}
13062 ins_pipe(ialu_reg);
13063 %}
13064
13065 // Xor Register with Immediate
13066 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13067 %{
13068 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13069 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13070 match(Set dst (XorI dst src));
13071 effect(KILL cr);
13072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13073
13074 format %{ "xorl $dst, $src\t# int" %}
13075 ins_encode %{
13076 __ xorl($dst$$Register, $src$$constant);
13077 %}
13078 ins_pipe(ialu_reg);
13079 %}
13080
13081 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13082 %{
13083 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13084 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13085 match(Set dst (XorI src1 src2));
13086 effect(KILL cr);
13087 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13088
13089 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13090 ins_encode %{
13091 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13092 %}
13093 ins_pipe(ialu_reg);
13094 %}
13095
13096 // Xor Register with Memory
13097 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13098 %{
13099 match(Set dst (XorI dst (LoadI src)));
13100 effect(KILL cr);
13101 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13102
13103 ins_cost(150);
13104 format %{ "xorl $dst, $src\t# int" %}
13105 ins_encode %{
13106 __ xorl($dst$$Register, $src$$Address);
13107 %}
13108 ins_pipe(ialu_reg_mem);
13109 %}
13110
13111 // Xor Memory with Register
13112 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13113 %{
13114 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13115 effect(KILL cr);
13116 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13117
13118 ins_cost(150);
13119 format %{ "xorb $dst, $src\t# byte" %}
13120 ins_encode %{
13121 __ xorb($dst$$Address, $src$$Register);
13122 %}
13123 ins_pipe(ialu_mem_reg);
13124 %}
13125
13126 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13127 %{
13128 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13129 effect(KILL cr);
13130 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13131
13132 ins_cost(150);
13133 format %{ "xorl $dst, $src\t# int" %}
13134 ins_encode %{
13135 __ xorl($dst$$Address, $src$$Register);
13136 %}
13137 ins_pipe(ialu_mem_reg);
13138 %}
13139
13140 // Xor Memory with Immediate
13141 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13142 %{
13143 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13144 effect(KILL cr);
13145 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13146
13147 ins_cost(125);
13148 format %{ "xorl $dst, $src\t# int" %}
13149 ins_encode %{
13150 __ xorl($dst$$Address, $src$$constant);
13151 %}
13152 ins_pipe(ialu_mem_imm);
13153 %}
13154
13155
13156 // Long Logical Instructions
13157
13158 // And Instructions
13159 // And Register with Register
13160 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13161 %{
13162 predicate(!UseAPX);
13163 match(Set dst (AndL dst src));
13164 effect(KILL cr);
13165 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13166
13167 format %{ "andq $dst, $src\t# long" %}
13168 ins_encode %{
13169 __ andq($dst$$Register, $src$$Register);
13170 %}
13171 ins_pipe(ialu_reg_reg);
13172 %}
13173
13174 // And Register with Register using New Data Destination (NDD)
13175 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13176 %{
13177 predicate(UseAPX);
13178 match(Set dst (AndL src1 src2));
13179 effect(KILL cr);
13180 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13181
13182 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13183 ins_encode %{
13184 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13185
13186 %}
13187 ins_pipe(ialu_reg_reg);
13188 %}
13189
13190 // And Register with Immediate 255
13191 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13192 %{
13193 match(Set dst (AndL src mask));
13194
13195 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13196 ins_encode %{
13197 // movzbl zeroes out the upper 32-bit and does not need REX.W
13198 __ movzbl($dst$$Register, $src$$Register);
13199 %}
13200 ins_pipe(ialu_reg);
13201 %}
13202
13203 // And Register with Immediate 65535
13204 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13205 %{
13206 match(Set dst (AndL src mask));
13207
13208 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13209 ins_encode %{
13210 // movzwl zeroes out the upper 32-bit and does not need REX.W
13211 __ movzwl($dst$$Register, $src$$Register);
13212 %}
13213 ins_pipe(ialu_reg);
13214 %}
13215
13216 // And Register with Immediate
13217 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13218 %{
13219 predicate(!UseAPX);
13220 match(Set dst (AndL dst src));
13221 effect(KILL cr);
13222 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13223
13224 format %{ "andq $dst, $src\t# long" %}
13225 ins_encode %{
13226 __ andq($dst$$Register, $src$$constant);
13227 %}
13228 ins_pipe(ialu_reg);
13229 %}
13230
13231 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13232 %{
13233 predicate(UseAPX);
13234 match(Set dst (AndL src1 src2));
13235 effect(KILL cr);
13236 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13237
13238 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13239 ins_encode %{
13240 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13241 %}
13242 ins_pipe(ialu_reg);
13243 %}
13244
13245 // And Register with Memory
13246 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13247 %{
13248 match(Set dst (AndL dst (LoadL src)));
13249 effect(KILL cr);
13250 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13251
13252 ins_cost(150);
13253 format %{ "andq $dst, $src\t# long" %}
13254 ins_encode %{
13255 __ andq($dst$$Register, $src$$Address);
13256 %}
13257 ins_pipe(ialu_reg_mem);
13258 %}
13259
13260 // And Memory with Register
13261 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13262 %{
13263 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13264 effect(KILL cr);
13265 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13266
13267 ins_cost(150);
13268 format %{ "andq $dst, $src\t# long" %}
13269 ins_encode %{
13270 __ andq($dst$$Address, $src$$Register);
13271 %}
13272 ins_pipe(ialu_mem_reg);
13273 %}
13274
13275 // And Memory with Immediate
13276 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13277 %{
13278 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13279 effect(KILL cr);
13280 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13281
13282 ins_cost(125);
13283 format %{ "andq $dst, $src\t# long" %}
13284 ins_encode %{
13285 __ andq($dst$$Address, $src$$constant);
13286 %}
13287 ins_pipe(ialu_mem_imm);
13288 %}
13289
13290 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13291 %{
13292 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13293 // because AND/OR works well enough for 8/32-bit values.
13294 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13295
13296 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13297 effect(KILL cr);
13298
13299 ins_cost(125);
13300 format %{ "btrq $dst, log2(not($con))\t# long" %}
13301 ins_encode %{
13302 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13303 %}
13304 ins_pipe(ialu_mem_imm);
13305 %}
13306
13307 // BMI1 instructions
13308 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13309 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13310 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13311 effect(KILL cr);
13312 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13313
13314 ins_cost(125);
13315 format %{ "andnq $dst, $src1, $src2" %}
13316
13317 ins_encode %{
13318 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13319 %}
13320 ins_pipe(ialu_reg_mem);
13321 %}
13322
13323 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13324 match(Set dst (AndL (XorL src1 minus_1) src2));
13325 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13326 effect(KILL cr);
13327 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13328
13329 format %{ "andnq $dst, $src1, $src2" %}
13330
13331 ins_encode %{
13332 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13333 %}
13334 ins_pipe(ialu_reg_mem);
13335 %}
13336
13337 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13338 match(Set dst (AndL (SubL imm_zero src) src));
13339 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13340 effect(KILL cr);
13341 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13342
13343 format %{ "blsiq $dst, $src" %}
13344
13345 ins_encode %{
13346 __ blsiq($dst$$Register, $src$$Register);
13347 %}
13348 ins_pipe(ialu_reg);
13349 %}
13350
13351 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13352 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13353 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13354 effect(KILL cr);
13355 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13356
13357 ins_cost(125);
13358 format %{ "blsiq $dst, $src" %}
13359
13360 ins_encode %{
13361 __ blsiq($dst$$Register, $src$$Address);
13362 %}
13363 ins_pipe(ialu_reg_mem);
13364 %}
13365
13366 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13367 %{
13368 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13369 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13370 effect(KILL cr);
13371 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13372
13373 ins_cost(125);
13374 format %{ "blsmskq $dst, $src" %}
13375
13376 ins_encode %{
13377 __ blsmskq($dst$$Register, $src$$Address);
13378 %}
13379 ins_pipe(ialu_reg_mem);
13380 %}
13381
13382 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13383 %{
13384 match(Set dst (XorL (AddL src minus_1) src));
13385 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13386 effect(KILL cr);
13387 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13388
13389 format %{ "blsmskq $dst, $src" %}
13390
13391 ins_encode %{
13392 __ blsmskq($dst$$Register, $src$$Register);
13393 %}
13394
13395 ins_pipe(ialu_reg);
13396 %}
13397
13398 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13399 %{
13400 match(Set dst (AndL (AddL src minus_1) src) );
13401 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13402 effect(KILL cr);
13403 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13404
13405 format %{ "blsrq $dst, $src" %}
13406
13407 ins_encode %{
13408 __ blsrq($dst$$Register, $src$$Register);
13409 %}
13410
13411 ins_pipe(ialu_reg);
13412 %}
13413
13414 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13415 %{
13416 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13417 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13418 effect(KILL cr);
13419 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13420
13421 ins_cost(125);
13422 format %{ "blsrq $dst, $src" %}
13423
13424 ins_encode %{
13425 __ blsrq($dst$$Register, $src$$Address);
13426 %}
13427
13428 ins_pipe(ialu_reg);
13429 %}
13430
13431 // Or Instructions
13432 // Or Register with Register
13433 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13434 %{
13435 predicate(!UseAPX);
13436 match(Set dst (OrL dst src));
13437 effect(KILL cr);
13438 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13439
13440 format %{ "orq $dst, $src\t# long" %}
13441 ins_encode %{
13442 __ orq($dst$$Register, $src$$Register);
13443 %}
13444 ins_pipe(ialu_reg_reg);
13445 %}
13446
13447 // Or Register with Register using New Data Destination (NDD)
13448 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13449 %{
13450 predicate(UseAPX);
13451 match(Set dst (OrL src1 src2));
13452 effect(KILL cr);
13453 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13454
13455 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13456 ins_encode %{
13457 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13458
13459 %}
13460 ins_pipe(ialu_reg_reg);
13461 %}
13462
13463 // Use any_RegP to match R15 (TLS register) without spilling.
13464 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13465 predicate(!UseAPX);
13466 match(Set dst (OrL dst (CastP2X src)));
13467 effect(KILL cr);
13468 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13469
13470 format %{ "orq $dst, $src\t# long" %}
13471 ins_encode %{
13472 __ orq($dst$$Register, $src$$Register);
13473 %}
13474 ins_pipe(ialu_reg_reg);
13475 %}
13476
13477 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13478 predicate(UseAPX);
13479 match(Set dst (OrL src1 (CastP2X src2)));
13480 effect(KILL cr);
13481 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13482
13483 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13484 ins_encode %{
13485 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13486 %}
13487 ins_pipe(ialu_reg_reg);
13488 %}
13489
13490 // Or Register with Immediate
13491 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13492 %{
13493 predicate(!UseAPX);
13494 match(Set dst (OrL dst src));
13495 effect(KILL cr);
13496 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13497
13498 format %{ "orq $dst, $src\t# long" %}
13499 ins_encode %{
13500 __ orq($dst$$Register, $src$$constant);
13501 %}
13502 ins_pipe(ialu_reg);
13503 %}
13504
13505 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13506 %{
13507 predicate(UseAPX);
13508 match(Set dst (OrL src1 src2));
13509 effect(KILL cr);
13510 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13511
13512 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13513 ins_encode %{
13514 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13515 %}
13516 ins_pipe(ialu_reg);
13517 %}
13518
13519 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13520 %{
13521 predicate(UseAPX);
13522 match(Set dst (OrL src1 src2));
13523 effect(KILL cr);
13524 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13525
13526 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13527 ins_encode %{
13528 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13529 %}
13530 ins_pipe(ialu_reg);
13531 %}
13532
13533 // Or Register with Memory
13534 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13535 %{
13536 match(Set dst (OrL dst (LoadL src)));
13537 effect(KILL cr);
13538 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539
13540 ins_cost(150);
13541 format %{ "orq $dst, $src\t# long" %}
13542 ins_encode %{
13543 __ orq($dst$$Register, $src$$Address);
13544 %}
13545 ins_pipe(ialu_reg_mem);
13546 %}
13547
13548 // Or Memory with Register
13549 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13550 %{
13551 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13552 effect(KILL cr);
13553 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13554
13555 ins_cost(150);
13556 format %{ "orq $dst, $src\t# long" %}
13557 ins_encode %{
13558 __ orq($dst$$Address, $src$$Register);
13559 %}
13560 ins_pipe(ialu_mem_reg);
13561 %}
13562
13563 // Or Memory with Immediate
13564 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13565 %{
13566 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13567 effect(KILL cr);
13568 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13569
13570 ins_cost(125);
13571 format %{ "orq $dst, $src\t# long" %}
13572 ins_encode %{
13573 __ orq($dst$$Address, $src$$constant);
13574 %}
13575 ins_pipe(ialu_mem_imm);
13576 %}
13577
13578 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13579 %{
13580 // con should be a pure 64-bit power of 2 immediate
13581 // because AND/OR works well enough for 8/32-bit values.
13582 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13583
13584 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13585 effect(KILL cr);
13586
13587 ins_cost(125);
13588 format %{ "btsq $dst, log2($con)\t# long" %}
13589 ins_encode %{
13590 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13591 %}
13592 ins_pipe(ialu_mem_imm);
13593 %}
13594
13595 // Xor Instructions
13596 // Xor Register with Register
13597 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13598 %{
13599 predicate(!UseAPX);
13600 match(Set dst (XorL dst src));
13601 effect(KILL cr);
13602 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13603
13604 format %{ "xorq $dst, $src\t# long" %}
13605 ins_encode %{
13606 __ xorq($dst$$Register, $src$$Register);
13607 %}
13608 ins_pipe(ialu_reg_reg);
13609 %}
13610
13611 // Xor Register with Register using New Data Destination (NDD)
13612 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13613 %{
13614 predicate(UseAPX);
13615 match(Set dst (XorL src1 src2));
13616 effect(KILL cr);
13617 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13618
13619 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13620 ins_encode %{
13621 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13622 %}
13623 ins_pipe(ialu_reg_reg);
13624 %}
13625
13626 // Xor Register with Immediate -1
13627 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13628 %{
13629 predicate(!UseAPX);
13630 match(Set dst (XorL dst imm));
13631
13632 format %{ "notq $dst" %}
13633 ins_encode %{
13634 __ notq($dst$$Register);
13635 %}
13636 ins_pipe(ialu_reg);
13637 %}
13638
13639 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
13640 %{
13641 predicate(UseAPX);
13642 match(Set dst (XorL src imm));
13643 flag(PD::Flag_ndd_demotable_opr1);
13644
13645 format %{ "enotq $dst, $src" %}
13646 ins_encode %{
13647 __ enotq($dst$$Register, $src$$Register);
13648 %}
13649 ins_pipe(ialu_reg);
13650 %}
13651
13652 // Xor Register with Immediate
13653 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13654 %{
13655 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
13656 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13657 match(Set dst (XorL dst src));
13658 effect(KILL cr);
13659 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13660
13661 format %{ "xorq $dst, $src\t# long" %}
13662 ins_encode %{
13663 __ xorq($dst$$Register, $src$$constant);
13664 %}
13665 ins_pipe(ialu_reg);
13666 %}
13667
13668 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13669 %{
13670 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
13671 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13672 match(Set dst (XorL src1 src2));
13673 effect(KILL cr);
13674 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13675
13676 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13677 ins_encode %{
13678 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13679 %}
13680 ins_pipe(ialu_reg);
13681 %}
13682
13683 // Xor Register with Memory
13684 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13685 %{
13686 match(Set dst (XorL dst (LoadL src)));
13687 effect(KILL cr);
13688 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13689
13690 ins_cost(150);
13691 format %{ "xorq $dst, $src\t# long" %}
13692 ins_encode %{
13693 __ xorq($dst$$Register, $src$$Address);
13694 %}
13695 ins_pipe(ialu_reg_mem);
13696 %}
13697
13698 // Xor Memory with Register
13699 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13700 %{
13701 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13702 effect(KILL cr);
13703 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13704
13705 ins_cost(150);
13706 format %{ "xorq $dst, $src\t# long" %}
13707 ins_encode %{
13708 __ xorq($dst$$Address, $src$$Register);
13709 %}
13710 ins_pipe(ialu_mem_reg);
13711 %}
13712
13713 // Xor Memory with Immediate
13714 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13715 %{
13716 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13717 effect(KILL cr);
13718 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13719
13720 ins_cost(125);
13721 format %{ "xorq $dst, $src\t# long" %}
13722 ins_encode %{
13723 __ xorq($dst$$Address, $src$$constant);
13724 %}
13725 ins_pipe(ialu_mem_imm);
13726 %}
13727
13728 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
13729 %{
13730 match(Set dst (CmpLTMask p q));
13731 effect(KILL cr);
13732
13733 ins_cost(400);
13734 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
13735 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
13736 "negl $dst" %}
13737 ins_encode %{
13738 __ cmpl($p$$Register, $q$$Register);
13739 __ setcc(Assembler::less, $dst$$Register);
13740 __ negl($dst$$Register);
13741 %}
13742 ins_pipe(pipe_slow);
13743 %}
13744
13745 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
13746 %{
13747 match(Set dst (CmpLTMask dst zero));
13748 effect(KILL cr);
13749
13750 ins_cost(100);
13751 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
13752 ins_encode %{
13753 __ sarl($dst$$Register, 31);
13754 %}
13755 ins_pipe(ialu_reg);
13756 %}
13757
13758 /* Better to save a register than avoid a branch */
13759 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13760 %{
13761 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
13762 effect(KILL cr);
13763 ins_cost(300);
13764 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
13765 "jge done\n\t"
13766 "addl $p,$y\n"
13767 "done: " %}
13768 ins_encode %{
13769 Register Rp = $p$$Register;
13770 Register Rq = $q$$Register;
13771 Register Ry = $y$$Register;
13772 Label done;
13773 __ subl(Rp, Rq);
13774 __ jccb(Assembler::greaterEqual, done);
13775 __ addl(Rp, Ry);
13776 __ bind(done);
13777 %}
13778 ins_pipe(pipe_cmplt);
13779 %}
13780
13781 /* Better to save a register than avoid a branch */
13782 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13783 %{
13784 match(Set y (AndI (CmpLTMask p q) y));
13785 effect(KILL cr);
13786
13787 ins_cost(300);
13788
13789 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
13790 "jlt done\n\t"
13791 "xorl $y, $y\n"
13792 "done: " %}
13793 ins_encode %{
13794 Register Rp = $p$$Register;
13795 Register Rq = $q$$Register;
13796 Register Ry = $y$$Register;
13797 Label done;
13798 __ cmpl(Rp, Rq);
13799 __ jccb(Assembler::less, done);
13800 __ xorl(Ry, Ry);
13801 __ bind(done);
13802 %}
13803 ins_pipe(pipe_cmplt);
13804 %}
13805
13806
13807 //---------- FP Instructions------------------------------------------------
13808
13809 // Really expensive, avoid
13810 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
13811 %{
13812 match(Set cr (CmpF src1 src2));
13813
13814 ins_cost(500);
13815 format %{ "ucomiss $src1, $src2\n\t"
13816 "jnp,s exit\n\t"
13817 "pushfq\t# saw NaN, set CF\n\t"
13818 "andq [rsp], #0xffffff2b\n\t"
13819 "popfq\n"
13820 "exit:" %}
13821 ins_encode %{
13822 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13823 emit_cmpfp_fixup(masm);
13824 %}
13825 ins_pipe(pipe_slow);
13826 %}
13827
13828 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
13829 match(Set cr (CmpF src1 src2));
13830
13831 ins_cost(100);
13832 format %{ "ucomiss $src1, $src2" %}
13833 ins_encode %{
13834 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13835 %}
13836 ins_pipe(pipe_slow);
13837 %}
13838
13839 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
13840 match(Set cr (CmpF src1 src2));
13841
13842 ins_cost(100);
13843 format %{ "evucomxss $src1, $src2" %}
13844 ins_encode %{
13845 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
13846 %}
13847 ins_pipe(pipe_slow);
13848 %}
13849
13850 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
13851 match(Set cr (CmpF src1 (LoadF src2)));
13852
13853 ins_cost(100);
13854 format %{ "ucomiss $src1, $src2" %}
13855 ins_encode %{
13856 __ ucomiss($src1$$XMMRegister, $src2$$Address);
13857 %}
13858 ins_pipe(pipe_slow);
13859 %}
13860
13861 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
13862 match(Set cr (CmpF src1 (LoadF src2)));
13863
13864 ins_cost(100);
13865 format %{ "evucomxss $src1, $src2" %}
13866 ins_encode %{
13867 __ evucomxss($src1$$XMMRegister, $src2$$Address);
13868 %}
13869 ins_pipe(pipe_slow);
13870 %}
13871
13872 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
13873 match(Set cr (CmpF src con));
13874
13875 ins_cost(100);
13876 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13877 ins_encode %{
13878 __ ucomiss($src$$XMMRegister, $constantaddress($con));
13879 %}
13880 ins_pipe(pipe_slow);
13881 %}
13882
13883 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
13884 match(Set cr (CmpF src con));
13885
13886 ins_cost(100);
13887 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13888 ins_encode %{
13889 __ evucomxss($src$$XMMRegister, $constantaddress($con));
13890 %}
13891 ins_pipe(pipe_slow);
13892 %}
13893
13894 // Really expensive, avoid
13895 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
13896 %{
13897 match(Set cr (CmpD src1 src2));
13898
13899 ins_cost(500);
13900 format %{ "ucomisd $src1, $src2\n\t"
13901 "jnp,s exit\n\t"
13902 "pushfq\t# saw NaN, set CF\n\t"
13903 "andq [rsp], #0xffffff2b\n\t"
13904 "popfq\n"
13905 "exit:" %}
13906 ins_encode %{
13907 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13908 emit_cmpfp_fixup(masm);
13909 %}
13910 ins_pipe(pipe_slow);
13911 %}
13912
13913 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
13914 match(Set cr (CmpD src1 src2));
13915
13916 ins_cost(100);
13917 format %{ "ucomisd $src1, $src2 test" %}
13918 ins_encode %{
13919 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13920 %}
13921 ins_pipe(pipe_slow);
13922 %}
13923
13924 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
13925 match(Set cr (CmpD src1 src2));
13926
13927 ins_cost(100);
13928 format %{ "evucomxsd $src1, $src2 test" %}
13929 ins_encode %{
13930 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
13931 %}
13932 ins_pipe(pipe_slow);
13933 %}
13934
13935 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
13936 match(Set cr (CmpD src1 (LoadD src2)));
13937
13938 ins_cost(100);
13939 format %{ "ucomisd $src1, $src2" %}
13940 ins_encode %{
13941 __ ucomisd($src1$$XMMRegister, $src2$$Address);
13942 %}
13943 ins_pipe(pipe_slow);
13944 %}
13945
13946 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
13947 match(Set cr (CmpD src1 (LoadD src2)));
13948
13949 ins_cost(100);
13950 format %{ "evucomxsd $src1, $src2" %}
13951 ins_encode %{
13952 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
13953 %}
13954 ins_pipe(pipe_slow);
13955 %}
13956
13957 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
13958 match(Set cr (CmpD src con));
13959 ins_cost(100);
13960 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13961 ins_encode %{
13962 __ ucomisd($src$$XMMRegister, $constantaddress($con));
13963 %}
13964 ins_pipe(pipe_slow);
13965 %}
13966
13967 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
13968 match(Set cr (CmpD src con));
13969
13970 ins_cost(100);
13971 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13972 ins_encode %{
13973 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
13974 %}
13975 ins_pipe(pipe_slow);
13976 %}
13977
13978 // Compare into -1,0,1
13979 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
13980 %{
13981 match(Set dst (CmpF3 src1 src2));
13982 effect(KILL cr);
13983
13984 ins_cost(275);
13985 format %{ "ucomiss $src1, $src2\n\t"
13986 "movl $dst, #-1\n\t"
13987 "jp,s done\n\t"
13988 "jb,s done\n\t"
13989 "setne $dst\n\t"
13990 "movzbl $dst, $dst\n"
13991 "done:" %}
13992 ins_encode %{
13993 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13994 emit_cmpfp3(masm, $dst$$Register);
13995 %}
13996 ins_pipe(pipe_slow);
13997 %}
13998
13999 // Compare into -1,0,1
14000 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14001 %{
14002 match(Set dst (CmpF3 src1 (LoadF src2)));
14003 effect(KILL cr);
14004
14005 ins_cost(275);
14006 format %{ "ucomiss $src1, $src2\n\t"
14007 "movl $dst, #-1\n\t"
14008 "jp,s done\n\t"
14009 "jb,s done\n\t"
14010 "setne $dst\n\t"
14011 "movzbl $dst, $dst\n"
14012 "done:" %}
14013 ins_encode %{
14014 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14015 emit_cmpfp3(masm, $dst$$Register);
14016 %}
14017 ins_pipe(pipe_slow);
14018 %}
14019
14020 // Compare into -1,0,1
14021 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14022 match(Set dst (CmpF3 src con));
14023 effect(KILL cr);
14024
14025 ins_cost(275);
14026 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14027 "movl $dst, #-1\n\t"
14028 "jp,s done\n\t"
14029 "jb,s done\n\t"
14030 "setne $dst\n\t"
14031 "movzbl $dst, $dst\n"
14032 "done:" %}
14033 ins_encode %{
14034 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14035 emit_cmpfp3(masm, $dst$$Register);
14036 %}
14037 ins_pipe(pipe_slow);
14038 %}
14039
14040 // Compare into -1,0,1
14041 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14042 %{
14043 match(Set dst (CmpD3 src1 src2));
14044 effect(KILL cr);
14045
14046 ins_cost(275);
14047 format %{ "ucomisd $src1, $src2\n\t"
14048 "movl $dst, #-1\n\t"
14049 "jp,s done\n\t"
14050 "jb,s done\n\t"
14051 "setne $dst\n\t"
14052 "movzbl $dst, $dst\n"
14053 "done:" %}
14054 ins_encode %{
14055 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14056 emit_cmpfp3(masm, $dst$$Register);
14057 %}
14058 ins_pipe(pipe_slow);
14059 %}
14060
14061 // Compare into -1,0,1
14062 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14063 %{
14064 match(Set dst (CmpD3 src1 (LoadD src2)));
14065 effect(KILL cr);
14066
14067 ins_cost(275);
14068 format %{ "ucomisd $src1, $src2\n\t"
14069 "movl $dst, #-1\n\t"
14070 "jp,s done\n\t"
14071 "jb,s done\n\t"
14072 "setne $dst\n\t"
14073 "movzbl $dst, $dst\n"
14074 "done:" %}
14075 ins_encode %{
14076 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14077 emit_cmpfp3(masm, $dst$$Register);
14078 %}
14079 ins_pipe(pipe_slow);
14080 %}
14081
14082 // Compare into -1,0,1
14083 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14084 match(Set dst (CmpD3 src con));
14085 effect(KILL cr);
14086
14087 ins_cost(275);
14088 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14089 "movl $dst, #-1\n\t"
14090 "jp,s done\n\t"
14091 "jb,s done\n\t"
14092 "setne $dst\n\t"
14093 "movzbl $dst, $dst\n"
14094 "done:" %}
14095 ins_encode %{
14096 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14097 emit_cmpfp3(masm, $dst$$Register);
14098 %}
14099 ins_pipe(pipe_slow);
14100 %}
14101
14102 //----------Arithmetic Conversion Instructions---------------------------------
14103
14104 instruct convF2D_reg_reg(regD dst, regF src)
14105 %{
14106 match(Set dst (ConvF2D src));
14107
14108 format %{ "cvtss2sd $dst, $src" %}
14109 ins_encode %{
14110 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14111 %}
14112 ins_pipe(pipe_slow); // XXX
14113 %}
14114
14115 instruct convF2D_reg_mem(regD dst, memory src)
14116 %{
14117 predicate(UseAVX == 0);
14118 match(Set dst (ConvF2D (LoadF src)));
14119
14120 format %{ "cvtss2sd $dst, $src" %}
14121 ins_encode %{
14122 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14123 %}
14124 ins_pipe(pipe_slow); // XXX
14125 %}
14126
14127 instruct convD2F_reg_reg(regF dst, regD src)
14128 %{
14129 match(Set dst (ConvD2F src));
14130
14131 format %{ "cvtsd2ss $dst, $src" %}
14132 ins_encode %{
14133 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14134 %}
14135 ins_pipe(pipe_slow); // XXX
14136 %}
14137
14138 instruct convD2F_reg_mem(regF dst, memory src)
14139 %{
14140 predicate(UseAVX == 0);
14141 match(Set dst (ConvD2F (LoadD src)));
14142
14143 format %{ "cvtsd2ss $dst, $src" %}
14144 ins_encode %{
14145 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14146 %}
14147 ins_pipe(pipe_slow); // XXX
14148 %}
14149
14150 // XXX do mem variants
14151 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14152 %{
14153 predicate(!VM_Version::supports_avx10_2());
14154 match(Set dst (ConvF2I src));
14155 effect(KILL cr);
14156 format %{ "convert_f2i $dst, $src" %}
14157 ins_encode %{
14158 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14159 %}
14160 ins_pipe(pipe_slow);
14161 %}
14162
14163 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14164 %{
14165 predicate(VM_Version::supports_avx10_2());
14166 match(Set dst (ConvF2I src));
14167 format %{ "evcvttss2sisl $dst, $src" %}
14168 ins_encode %{
14169 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14170 %}
14171 ins_pipe(pipe_slow);
14172 %}
14173
14174 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14175 %{
14176 predicate(VM_Version::supports_avx10_2());
14177 match(Set dst (ConvF2I (LoadF src)));
14178 format %{ "evcvttss2sisl $dst, $src" %}
14179 ins_encode %{
14180 __ evcvttss2sisl($dst$$Register, $src$$Address);
14181 %}
14182 ins_pipe(pipe_slow);
14183 %}
14184
14185 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14186 %{
14187 predicate(!VM_Version::supports_avx10_2());
14188 match(Set dst (ConvF2L src));
14189 effect(KILL cr);
14190 format %{ "convert_f2l $dst, $src"%}
14191 ins_encode %{
14192 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14193 %}
14194 ins_pipe(pipe_slow);
14195 %}
14196
14197 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14198 %{
14199 predicate(VM_Version::supports_avx10_2());
14200 match(Set dst (ConvF2L src));
14201 format %{ "evcvttss2sisq $dst, $src" %}
14202 ins_encode %{
14203 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14204 %}
14205 ins_pipe(pipe_slow);
14206 %}
14207
14208 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14209 %{
14210 predicate(VM_Version::supports_avx10_2());
14211 match(Set dst (ConvF2L (LoadF src)));
14212 format %{ "evcvttss2sisq $dst, $src" %}
14213 ins_encode %{
14214 __ evcvttss2sisq($dst$$Register, $src$$Address);
14215 %}
14216 ins_pipe(pipe_slow);
14217 %}
14218
14219 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14220 %{
14221 predicate(!VM_Version::supports_avx10_2());
14222 match(Set dst (ConvD2I src));
14223 effect(KILL cr);
14224 format %{ "convert_d2i $dst, $src"%}
14225 ins_encode %{
14226 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14227 %}
14228 ins_pipe(pipe_slow);
14229 %}
14230
14231 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14232 %{
14233 predicate(VM_Version::supports_avx10_2());
14234 match(Set dst (ConvD2I src));
14235 format %{ "evcvttsd2sisl $dst, $src" %}
14236 ins_encode %{
14237 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14238 %}
14239 ins_pipe(pipe_slow);
14240 %}
14241
14242 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14243 %{
14244 predicate(VM_Version::supports_avx10_2());
14245 match(Set dst (ConvD2I (LoadD src)));
14246 format %{ "evcvttsd2sisl $dst, $src" %}
14247 ins_encode %{
14248 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14249 %}
14250 ins_pipe(pipe_slow);
14251 %}
14252
14253 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14254 %{
14255 predicate(!VM_Version::supports_avx10_2());
14256 match(Set dst (ConvD2L src));
14257 effect(KILL cr);
14258 format %{ "convert_d2l $dst, $src"%}
14259 ins_encode %{
14260 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14261 %}
14262 ins_pipe(pipe_slow);
14263 %}
14264
14265 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14266 %{
14267 predicate(VM_Version::supports_avx10_2());
14268 match(Set dst (ConvD2L src));
14269 format %{ "evcvttsd2sisq $dst, $src" %}
14270 ins_encode %{
14271 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14272 %}
14273 ins_pipe(pipe_slow);
14274 %}
14275
14276 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14277 %{
14278 predicate(VM_Version::supports_avx10_2());
14279 match(Set dst (ConvD2L (LoadD src)));
14280 format %{ "evcvttsd2sisq $dst, $src" %}
14281 ins_encode %{
14282 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14283 %}
14284 ins_pipe(pipe_slow);
14285 %}
14286
14287 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14288 %{
14289 match(Set dst (RoundD src));
14290 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14291 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14292 ins_encode %{
14293 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14294 %}
14295 ins_pipe(pipe_slow);
14296 %}
14297
14298 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14299 %{
14300 match(Set dst (RoundF src));
14301 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14302 format %{ "round_float $dst,$src" %}
14303 ins_encode %{
14304 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14305 %}
14306 ins_pipe(pipe_slow);
14307 %}
14308
14309 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14310 %{
14311 predicate(!UseXmmI2F);
14312 match(Set dst (ConvI2F src));
14313
14314 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14315 ins_encode %{
14316 if (UseAVX > 0) {
14317 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14318 }
14319 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14320 %}
14321 ins_pipe(pipe_slow); // XXX
14322 %}
14323
14324 instruct convI2F_reg_mem(regF dst, memory src)
14325 %{
14326 predicate(UseAVX == 0);
14327 match(Set dst (ConvI2F (LoadI src)));
14328
14329 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14330 ins_encode %{
14331 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14332 %}
14333 ins_pipe(pipe_slow); // XXX
14334 %}
14335
14336 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14337 %{
14338 predicate(!UseXmmI2D);
14339 match(Set dst (ConvI2D src));
14340
14341 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14342 ins_encode %{
14343 if (UseAVX > 0) {
14344 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14345 }
14346 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14347 %}
14348 ins_pipe(pipe_slow); // XXX
14349 %}
14350
14351 instruct convI2D_reg_mem(regD dst, memory src)
14352 %{
14353 predicate(UseAVX == 0);
14354 match(Set dst (ConvI2D (LoadI src)));
14355
14356 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14357 ins_encode %{
14358 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14359 %}
14360 ins_pipe(pipe_slow); // XXX
14361 %}
14362
14363 instruct convXI2F_reg(regF dst, rRegI src)
14364 %{
14365 predicate(UseXmmI2F);
14366 match(Set dst (ConvI2F src));
14367
14368 format %{ "movdl $dst, $src\n\t"
14369 "cvtdq2psl $dst, $dst\t# i2f" %}
14370 ins_encode %{
14371 __ movdl($dst$$XMMRegister, $src$$Register);
14372 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14373 %}
14374 ins_pipe(pipe_slow); // XXX
14375 %}
14376
14377 instruct convXI2D_reg(regD dst, rRegI src)
14378 %{
14379 predicate(UseXmmI2D);
14380 match(Set dst (ConvI2D src));
14381
14382 format %{ "movdl $dst, $src\n\t"
14383 "cvtdq2pdl $dst, $dst\t# i2d" %}
14384 ins_encode %{
14385 __ movdl($dst$$XMMRegister, $src$$Register);
14386 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14387 %}
14388 ins_pipe(pipe_slow); // XXX
14389 %}
14390
14391 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14392 %{
14393 match(Set dst (ConvL2F src));
14394
14395 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14396 ins_encode %{
14397 if (UseAVX > 0) {
14398 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14399 }
14400 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14401 %}
14402 ins_pipe(pipe_slow); // XXX
14403 %}
14404
14405 instruct convL2F_reg_mem(regF dst, memory src)
14406 %{
14407 predicate(UseAVX == 0);
14408 match(Set dst (ConvL2F (LoadL src)));
14409
14410 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14411 ins_encode %{
14412 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14413 %}
14414 ins_pipe(pipe_slow); // XXX
14415 %}
14416
14417 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14418 %{
14419 match(Set dst (ConvL2D src));
14420
14421 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14422 ins_encode %{
14423 if (UseAVX > 0) {
14424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14425 }
14426 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14427 %}
14428 ins_pipe(pipe_slow); // XXX
14429 %}
14430
14431 instruct convL2D_reg_mem(regD dst, memory src)
14432 %{
14433 predicate(UseAVX == 0);
14434 match(Set dst (ConvL2D (LoadL src)));
14435
14436 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14437 ins_encode %{
14438 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14439 %}
14440 ins_pipe(pipe_slow); // XXX
14441 %}
14442
14443 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14444 %{
14445 match(Set dst (ConvI2L src));
14446
14447 ins_cost(125);
14448 format %{ "movslq $dst, $src\t# i2l" %}
14449 ins_encode %{
14450 __ movslq($dst$$Register, $src$$Register);
14451 %}
14452 ins_pipe(ialu_reg_reg);
14453 %}
14454
14455 // Zero-extend convert int to long
14456 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14457 %{
14458 match(Set dst (AndL (ConvI2L src) mask));
14459
14460 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14461 ins_encode %{
14462 if ($dst$$reg != $src$$reg) {
14463 __ movl($dst$$Register, $src$$Register);
14464 }
14465 %}
14466 ins_pipe(ialu_reg_reg);
14467 %}
14468
14469 // Zero-extend convert int to long
14470 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14471 %{
14472 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14473
14474 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14475 ins_encode %{
14476 __ movl($dst$$Register, $src$$Address);
14477 %}
14478 ins_pipe(ialu_reg_mem);
14479 %}
14480
14481 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14482 %{
14483 match(Set dst (AndL src mask));
14484
14485 format %{ "movl $dst, $src\t# zero-extend long" %}
14486 ins_encode %{
14487 __ movl($dst$$Register, $src$$Register);
14488 %}
14489 ins_pipe(ialu_reg_reg);
14490 %}
14491
14492 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14493 %{
14494 match(Set dst (ConvL2I src));
14495
14496 format %{ "movl $dst, $src\t# l2i" %}
14497 ins_encode %{
14498 __ movl($dst$$Register, $src$$Register);
14499 %}
14500 ins_pipe(ialu_reg_reg);
14501 %}
14502
14503
14504 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14505 match(Set dst (MoveF2I src));
14506 effect(DEF dst, USE src);
14507
14508 ins_cost(125);
14509 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14510 ins_encode %{
14511 __ movl($dst$$Register, Address(rsp, $src$$disp));
14512 %}
14513 ins_pipe(ialu_reg_mem);
14514 %}
14515
14516 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14517 match(Set dst (MoveI2F src));
14518 effect(DEF dst, USE src);
14519
14520 ins_cost(125);
14521 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14522 ins_encode %{
14523 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14524 %}
14525 ins_pipe(pipe_slow);
14526 %}
14527
14528 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14529 match(Set dst (MoveD2L src));
14530 effect(DEF dst, USE src);
14531
14532 ins_cost(125);
14533 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14534 ins_encode %{
14535 __ movq($dst$$Register, Address(rsp, $src$$disp));
14536 %}
14537 ins_pipe(ialu_reg_mem);
14538 %}
14539
14540 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14541 predicate(!UseXmmLoadAndClearUpper);
14542 match(Set dst (MoveL2D src));
14543 effect(DEF dst, USE src);
14544
14545 ins_cost(125);
14546 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14547 ins_encode %{
14548 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14549 %}
14550 ins_pipe(pipe_slow);
14551 %}
14552
14553 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14554 predicate(UseXmmLoadAndClearUpper);
14555 match(Set dst (MoveL2D src));
14556 effect(DEF dst, USE src);
14557
14558 ins_cost(125);
14559 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14560 ins_encode %{
14561 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14562 %}
14563 ins_pipe(pipe_slow);
14564 %}
14565
14566
14567 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14568 match(Set dst (MoveF2I src));
14569 effect(DEF dst, USE src);
14570
14571 ins_cost(95); // XXX
14572 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14573 ins_encode %{
14574 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14575 %}
14576 ins_pipe(pipe_slow);
14577 %}
14578
14579 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14580 match(Set dst (MoveI2F src));
14581 effect(DEF dst, USE src);
14582
14583 ins_cost(100);
14584 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14585 ins_encode %{
14586 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14587 %}
14588 ins_pipe( ialu_mem_reg );
14589 %}
14590
14591 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14592 match(Set dst (MoveD2L src));
14593 effect(DEF dst, USE src);
14594
14595 ins_cost(95); // XXX
14596 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14597 ins_encode %{
14598 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14599 %}
14600 ins_pipe(pipe_slow);
14601 %}
14602
14603 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14604 match(Set dst (MoveL2D src));
14605 effect(DEF dst, USE src);
14606
14607 ins_cost(100);
14608 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14609 ins_encode %{
14610 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14611 %}
14612 ins_pipe(ialu_mem_reg);
14613 %}
14614
14615 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14616 match(Set dst (MoveF2I src));
14617 effect(DEF dst, USE src);
14618 ins_cost(85);
14619 format %{ "movd $dst,$src\t# MoveF2I" %}
14620 ins_encode %{
14621 __ movdl($dst$$Register, $src$$XMMRegister);
14622 %}
14623 ins_pipe( pipe_slow );
14624 %}
14625
14626 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14627 match(Set dst (MoveD2L src));
14628 effect(DEF dst, USE src);
14629 ins_cost(85);
14630 format %{ "movd $dst,$src\t# MoveD2L" %}
14631 ins_encode %{
14632 __ movdq($dst$$Register, $src$$XMMRegister);
14633 %}
14634 ins_pipe( pipe_slow );
14635 %}
14636
14637 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14638 match(Set dst (MoveI2F src));
14639 effect(DEF dst, USE src);
14640 ins_cost(100);
14641 format %{ "movd $dst,$src\t# MoveI2F" %}
14642 ins_encode %{
14643 __ movdl($dst$$XMMRegister, $src$$Register);
14644 %}
14645 ins_pipe( pipe_slow );
14646 %}
14647
14648 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14649 match(Set dst (MoveL2D src));
14650 effect(DEF dst, USE src);
14651 ins_cost(100);
14652 format %{ "movd $dst,$src\t# MoveL2D" %}
14653 ins_encode %{
14654 __ movdq($dst$$XMMRegister, $src$$Register);
14655 %}
14656 ins_pipe( pipe_slow );
14657 %}
14658
14659 // Fast clearing of an array
14660 // Small non-constant lenght ClearArray for non-AVX512 targets.
14661 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14662 Universe dummy, rFlagsReg cr)
14663 %{
14664 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14665 match(Set dummy (ClearArray cnt base));
14666 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14667
14668 format %{ $$template
14669 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14670 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14671 $$emit$$"jg LARGE\n\t"
14672 $$emit$$"dec rcx\n\t"
14673 $$emit$$"js DONE\t# Zero length\n\t"
14674 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14675 $$emit$$"dec rcx\n\t"
14676 $$emit$$"jge LOOP\n\t"
14677 $$emit$$"jmp DONE\n\t"
14678 $$emit$$"# LARGE:\n\t"
14679 if (UseFastStosb) {
14680 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14681 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14682 } else if (UseXMMForObjInit) {
14683 $$emit$$"mov rdi,rax\n\t"
14684 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14685 $$emit$$"jmpq L_zero_64_bytes\n\t"
14686 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14687 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14688 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14689 $$emit$$"add 0x40,rax\n\t"
14690 $$emit$$"# L_zero_64_bytes:\n\t"
14691 $$emit$$"sub 0x8,rcx\n\t"
14692 $$emit$$"jge L_loop\n\t"
14693 $$emit$$"add 0x4,rcx\n\t"
14694 $$emit$$"jl L_tail\n\t"
14695 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14696 $$emit$$"add 0x20,rax\n\t"
14697 $$emit$$"sub 0x4,rcx\n\t"
14698 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14699 $$emit$$"add 0x4,rcx\n\t"
14700 $$emit$$"jle L_end\n\t"
14701 $$emit$$"dec rcx\n\t"
14702 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14703 $$emit$$"vmovq xmm0,(rax)\n\t"
14704 $$emit$$"add 0x8,rax\n\t"
14705 $$emit$$"dec rcx\n\t"
14706 $$emit$$"jge L_sloop\n\t"
14707 $$emit$$"# L_end:\n\t"
14708 } else {
14709 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14710 }
14711 $$emit$$"# DONE"
14712 %}
14713 ins_encode %{
14714 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14715 $tmp$$XMMRegister, false, knoreg);
14716 %}
14717 ins_pipe(pipe_slow);
14718 %}
14719
14720 // Small non-constant length ClearArray for AVX512 targets.
14721 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14722 Universe dummy, rFlagsReg cr)
14723 %{
14724 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
14725 match(Set dummy (ClearArray cnt base));
14726 ins_cost(125);
14727 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14728
14729 format %{ $$template
14730 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14731 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14732 $$emit$$"jg LARGE\n\t"
14733 $$emit$$"dec rcx\n\t"
14734 $$emit$$"js DONE\t# Zero length\n\t"
14735 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
14736 $$emit$$"dec rcx\n\t"
14737 $$emit$$"jge LOOP\n\t"
14738 $$emit$$"jmp DONE\n\t"
14739 $$emit$$"# LARGE:\n\t"
14740 if (UseFastStosb) {
14741 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14742 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
14743 } else if (UseXMMForObjInit) {
14744 $$emit$$"mov rdi,rax\n\t"
14745 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14746 $$emit$$"jmpq L_zero_64_bytes\n\t"
14747 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14748 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14749 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14750 $$emit$$"add 0x40,rax\n\t"
14751 $$emit$$"# L_zero_64_bytes:\n\t"
14752 $$emit$$"sub 0x8,rcx\n\t"
14753 $$emit$$"jge L_loop\n\t"
14754 $$emit$$"add 0x4,rcx\n\t"
14755 $$emit$$"jl L_tail\n\t"
14756 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14757 $$emit$$"add 0x20,rax\n\t"
14758 $$emit$$"sub 0x4,rcx\n\t"
14759 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14760 $$emit$$"add 0x4,rcx\n\t"
14761 $$emit$$"jle L_end\n\t"
14762 $$emit$$"dec rcx\n\t"
14763 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14764 $$emit$$"vmovq xmm0,(rax)\n\t"
14765 $$emit$$"add 0x8,rax\n\t"
14766 $$emit$$"dec rcx\n\t"
14767 $$emit$$"jge L_sloop\n\t"
14768 $$emit$$"# L_end:\n\t"
14769 } else {
14770 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
14771 }
14772 $$emit$$"# DONE"
14773 %}
14774 ins_encode %{
14775 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14776 $tmp$$XMMRegister, false, $ktmp$$KRegister);
14777 %}
14778 ins_pipe(pipe_slow);
14779 %}
14780
14781 // Large non-constant length ClearArray for non-AVX512 targets.
14782 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14783 Universe dummy, rFlagsReg cr)
14784 %{
14785 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
14786 match(Set dummy (ClearArray cnt base));
14787 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14788
14789 format %{ $$template
14790 if (UseFastStosb) {
14791 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14792 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14793 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14794 } else if (UseXMMForObjInit) {
14795 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
14796 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14797 $$emit$$"jmpq L_zero_64_bytes\n\t"
14798 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14799 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14800 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14801 $$emit$$"add 0x40,rax\n\t"
14802 $$emit$$"# L_zero_64_bytes:\n\t"
14803 $$emit$$"sub 0x8,rcx\n\t"
14804 $$emit$$"jge L_loop\n\t"
14805 $$emit$$"add 0x4,rcx\n\t"
14806 $$emit$$"jl L_tail\n\t"
14807 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14808 $$emit$$"add 0x20,rax\n\t"
14809 $$emit$$"sub 0x4,rcx\n\t"
14810 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14811 $$emit$$"add 0x4,rcx\n\t"
14812 $$emit$$"jle L_end\n\t"
14813 $$emit$$"dec rcx\n\t"
14814 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14815 $$emit$$"vmovq xmm0,(rax)\n\t"
14816 $$emit$$"add 0x8,rax\n\t"
14817 $$emit$$"dec rcx\n\t"
14818 $$emit$$"jge L_sloop\n\t"
14819 $$emit$$"# L_end:\n\t"
14820 } else {
14821 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14822 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
14823 }
14824 %}
14825 ins_encode %{
14826 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14827 $tmp$$XMMRegister, true, knoreg);
14828 %}
14829 ins_pipe(pipe_slow);
14830 %}
14831
14832 // Large non-constant length ClearArray for AVX512 targets.
14833 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14834 Universe dummy, rFlagsReg cr)
14835 %{
14836 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
14837 match(Set dummy (ClearArray cnt base));
14838 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14839
14840 format %{ $$template
14841 if (UseFastStosb) {
14842 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14843 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
14844 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
14845 } else if (UseXMMForObjInit) {
14846 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
14847 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
14848 $$emit$$"jmpq L_zero_64_bytes\n\t"
14849 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14850 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14851 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14852 $$emit$$"add 0x40,rax\n\t"
14853 $$emit$$"# L_zero_64_bytes:\n\t"
14854 $$emit$$"sub 0x8,rcx\n\t"
14855 $$emit$$"jge L_loop\n\t"
14856 $$emit$$"add 0x4,rcx\n\t"
14857 $$emit$$"jl L_tail\n\t"
14858 $$emit$$"vmovdqu ymm0,(rax)\n\t"
14859 $$emit$$"add 0x20,rax\n\t"
14860 $$emit$$"sub 0x4,rcx\n\t"
14861 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14862 $$emit$$"add 0x4,rcx\n\t"
14863 $$emit$$"jle L_end\n\t"
14864 $$emit$$"dec rcx\n\t"
14865 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14866 $$emit$$"vmovq xmm0,(rax)\n\t"
14867 $$emit$$"add 0x8,rax\n\t"
14868 $$emit$$"dec rcx\n\t"
14869 $$emit$$"jge L_sloop\n\t"
14870 $$emit$$"# L_end:\n\t"
14871 } else {
14872 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14873 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
14874 }
14875 %}
14876 ins_encode %{
14877 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14878 $tmp$$XMMRegister, true, $ktmp$$KRegister);
14879 %}
14880 ins_pipe(pipe_slow);
14881 %}
14882
14883 // Small constant length ClearArray for AVX512 targets.
14884 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
14885 %{
14886 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
14887 match(Set dummy (ClearArray cnt base));
14888 ins_cost(100);
14889 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
14890 format %{ "clear_mem_imm $base , $cnt \n\t" %}
14891 ins_encode %{
14892 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
14893 %}
14894 ins_pipe(pipe_slow);
14895 %}
14896
14897 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14898 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14899 %{
14900 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14901 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14902 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14903
14904 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14905 ins_encode %{
14906 __ string_compare($str1$$Register, $str2$$Register,
14907 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14908 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
14909 %}
14910 ins_pipe( pipe_slow );
14911 %}
14912
14913 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14914 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14915 %{
14916 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14917 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14918 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14919
14920 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14921 ins_encode %{
14922 __ string_compare($str1$$Register, $str2$$Register,
14923 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14924 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
14925 %}
14926 ins_pipe( pipe_slow );
14927 %}
14928
14929 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14930 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14931 %{
14932 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14933 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14934 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14935
14936 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14937 ins_encode %{
14938 __ string_compare($str1$$Register, $str2$$Register,
14939 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14940 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
14941 %}
14942 ins_pipe( pipe_slow );
14943 %}
14944
14945 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14946 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14947 %{
14948 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14949 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14950 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14951
14952 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14953 ins_encode %{
14954 __ string_compare($str1$$Register, $str2$$Register,
14955 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14956 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
14957 %}
14958 ins_pipe( pipe_slow );
14959 %}
14960
14961 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14962 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14963 %{
14964 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14965 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14966 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14967
14968 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14969 ins_encode %{
14970 __ string_compare($str1$$Register, $str2$$Register,
14971 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14972 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
14973 %}
14974 ins_pipe( pipe_slow );
14975 %}
14976
14977 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14978 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14979 %{
14980 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14981 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14982 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14983
14984 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
14985 ins_encode %{
14986 __ string_compare($str1$$Register, $str2$$Register,
14987 $cnt1$$Register, $cnt2$$Register, $result$$Register,
14988 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
14989 %}
14990 ins_pipe( pipe_slow );
14991 %}
14992
14993 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
14994 rax_RegI result, legRegD tmp1, rFlagsReg cr)
14995 %{
14996 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14997 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14998 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14999
15000 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15001 ins_encode %{
15002 __ string_compare($str2$$Register, $str1$$Register,
15003 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15004 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15005 %}
15006 ins_pipe( pipe_slow );
15007 %}
15008
15009 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15010 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15011 %{
15012 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15013 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15014 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15015
15016 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15017 ins_encode %{
15018 __ string_compare($str2$$Register, $str1$$Register,
15019 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15020 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15021 %}
15022 ins_pipe( pipe_slow );
15023 %}
15024
15025 // fast search of substring with known size.
15026 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15027 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15028 %{
15029 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15030 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15031 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15032
15033 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15034 ins_encode %{
15035 int icnt2 = (int)$int_cnt2$$constant;
15036 if (icnt2 >= 16) {
15037 // IndexOf for constant substrings with size >= 16 elements
15038 // which don't need to be loaded through stack.
15039 __ string_indexofC8($str1$$Register, $str2$$Register,
15040 $cnt1$$Register, $cnt2$$Register,
15041 icnt2, $result$$Register,
15042 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15043 } else {
15044 // Small strings are loaded through stack if they cross page boundary.
15045 __ string_indexof($str1$$Register, $str2$$Register,
15046 $cnt1$$Register, $cnt2$$Register,
15047 icnt2, $result$$Register,
15048 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15049 }
15050 %}
15051 ins_pipe( pipe_slow );
15052 %}
15053
15054 // fast search of substring with known size.
15055 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15056 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15057 %{
15058 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15059 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15060 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15061
15062 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15063 ins_encode %{
15064 int icnt2 = (int)$int_cnt2$$constant;
15065 if (icnt2 >= 8) {
15066 // IndexOf for constant substrings with size >= 8 elements
15067 // which don't need to be loaded through stack.
15068 __ string_indexofC8($str1$$Register, $str2$$Register,
15069 $cnt1$$Register, $cnt2$$Register,
15070 icnt2, $result$$Register,
15071 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15072 } else {
15073 // Small strings are loaded through stack if they cross page boundary.
15074 __ string_indexof($str1$$Register, $str2$$Register,
15075 $cnt1$$Register, $cnt2$$Register,
15076 icnt2, $result$$Register,
15077 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15078 }
15079 %}
15080 ins_pipe( pipe_slow );
15081 %}
15082
15083 // fast search of substring with known size.
15084 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15085 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15086 %{
15087 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15088 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15089 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15090
15091 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15092 ins_encode %{
15093 int icnt2 = (int)$int_cnt2$$constant;
15094 if (icnt2 >= 8) {
15095 // IndexOf for constant substrings with size >= 8 elements
15096 // which don't need to be loaded through stack.
15097 __ string_indexofC8($str1$$Register, $str2$$Register,
15098 $cnt1$$Register, $cnt2$$Register,
15099 icnt2, $result$$Register,
15100 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15101 } else {
15102 // Small strings are loaded through stack if they cross page boundary.
15103 __ string_indexof($str1$$Register, $str2$$Register,
15104 $cnt1$$Register, $cnt2$$Register,
15105 icnt2, $result$$Register,
15106 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15107 }
15108 %}
15109 ins_pipe( pipe_slow );
15110 %}
15111
15112 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15113 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15114 %{
15115 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15116 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15117 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15118
15119 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15120 ins_encode %{
15121 __ string_indexof($str1$$Register, $str2$$Register,
15122 $cnt1$$Register, $cnt2$$Register,
15123 (-1), $result$$Register,
15124 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15125 %}
15126 ins_pipe( pipe_slow );
15127 %}
15128
15129 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15130 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15131 %{
15132 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15133 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15134 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15135
15136 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15137 ins_encode %{
15138 __ string_indexof($str1$$Register, $str2$$Register,
15139 $cnt1$$Register, $cnt2$$Register,
15140 (-1), $result$$Register,
15141 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15142 %}
15143 ins_pipe( pipe_slow );
15144 %}
15145
15146 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15147 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15148 %{
15149 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15150 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15151 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15152
15153 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15154 ins_encode %{
15155 __ string_indexof($str1$$Register, $str2$$Register,
15156 $cnt1$$Register, $cnt2$$Register,
15157 (-1), $result$$Register,
15158 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15159 %}
15160 ins_pipe( pipe_slow );
15161 %}
15162
15163 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15164 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15165 %{
15166 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15167 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15168 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15169 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15170 ins_encode %{
15171 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15172 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15173 %}
15174 ins_pipe( pipe_slow );
15175 %}
15176
15177 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15178 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15179 %{
15180 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15181 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15182 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15183 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15184 ins_encode %{
15185 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15186 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15187 %}
15188 ins_pipe( pipe_slow );
15189 %}
15190
15191 // fast string equals
15192 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15193 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15194 %{
15195 predicate(!VM_Version::supports_avx512vlbw());
15196 match(Set result (StrEquals (Binary str1 str2) cnt));
15197 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15198
15199 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15200 ins_encode %{
15201 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15202 $cnt$$Register, $result$$Register, $tmp3$$Register,
15203 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15204 %}
15205 ins_pipe( pipe_slow );
15206 %}
15207
15208 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15209 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15210 %{
15211 predicate(VM_Version::supports_avx512vlbw());
15212 match(Set result (StrEquals (Binary str1 str2) cnt));
15213 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15214
15215 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15216 ins_encode %{
15217 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15218 $cnt$$Register, $result$$Register, $tmp3$$Register,
15219 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15220 %}
15221 ins_pipe( pipe_slow );
15222 %}
15223
15224 // fast array equals
15225 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15226 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15227 %{
15228 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15229 match(Set result (AryEq ary1 ary2));
15230 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15231
15232 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15233 ins_encode %{
15234 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15235 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15236 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15237 %}
15238 ins_pipe( pipe_slow );
15239 %}
15240
15241 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15242 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15243 %{
15244 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15245 match(Set result (AryEq ary1 ary2));
15246 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15247
15248 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15249 ins_encode %{
15250 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15251 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15252 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15253 %}
15254 ins_pipe( pipe_slow );
15255 %}
15256
15257 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15258 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15259 %{
15260 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15261 match(Set result (AryEq ary1 ary2));
15262 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15263
15264 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15265 ins_encode %{
15266 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15267 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15268 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15269 %}
15270 ins_pipe( pipe_slow );
15271 %}
15272
15273 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15274 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15275 %{
15276 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15277 match(Set result (AryEq ary1 ary2));
15278 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15279
15280 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15281 ins_encode %{
15282 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15283 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15284 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15285 %}
15286 ins_pipe( pipe_slow );
15287 %}
15288
15289 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15290 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15291 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15292 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15293 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15294 %{
15295 predicate(UseAVX >= 2);
15296 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15297 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15298 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15299 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15300 USE basic_type, KILL cr);
15301
15302 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15303 ins_encode %{
15304 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15305 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15306 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15307 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15308 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15309 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15310 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15311 %}
15312 ins_pipe( pipe_slow );
15313 %}
15314
15315 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15316 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15317 %{
15318 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15319 match(Set result (CountPositives ary1 len));
15320 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15321
15322 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15323 ins_encode %{
15324 __ count_positives($ary1$$Register, $len$$Register,
15325 $result$$Register, $tmp3$$Register,
15326 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15327 %}
15328 ins_pipe( pipe_slow );
15329 %}
15330
15331 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15332 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15333 %{
15334 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15335 match(Set result (CountPositives ary1 len));
15336 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15337
15338 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15339 ins_encode %{
15340 __ count_positives($ary1$$Register, $len$$Register,
15341 $result$$Register, $tmp3$$Register,
15342 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15343 %}
15344 ins_pipe( pipe_slow );
15345 %}
15346
15347 // fast char[] to byte[] compression
15348 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15349 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15350 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15351 match(Set result (StrCompressedCopy src (Binary dst len)));
15352 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15353 USE_KILL len, KILL tmp5, KILL cr);
15354
15355 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15356 ins_encode %{
15357 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15358 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15359 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15360 knoreg, knoreg);
15361 %}
15362 ins_pipe( pipe_slow );
15363 %}
15364
15365 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15366 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15367 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15368 match(Set result (StrCompressedCopy src (Binary dst len)));
15369 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15370 USE_KILL len, KILL tmp5, KILL cr);
15371
15372 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15373 ins_encode %{
15374 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15375 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15376 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15377 $ktmp1$$KRegister, $ktmp2$$KRegister);
15378 %}
15379 ins_pipe( pipe_slow );
15380 %}
15381 // fast byte[] to char[] inflation
15382 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15383 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15384 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15385 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15386 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15387
15388 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15389 ins_encode %{
15390 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15391 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15392 %}
15393 ins_pipe( pipe_slow );
15394 %}
15395
15396 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15397 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15398 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15399 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15400 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15401
15402 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15403 ins_encode %{
15404 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15405 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15406 %}
15407 ins_pipe( pipe_slow );
15408 %}
15409
15410 // encode char[] to byte[] in ISO_8859_1
15411 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15412 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15413 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15414 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15415 match(Set result (EncodeISOArray src (Binary dst len)));
15416 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15417
15418 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15419 ins_encode %{
15420 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15421 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15422 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15423 %}
15424 ins_pipe( pipe_slow );
15425 %}
15426
15427 // encode char[] to byte[] in ASCII
15428 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15429 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15430 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15431 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15432 match(Set result (EncodeISOArray src (Binary dst len)));
15433 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15434
15435 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15436 ins_encode %{
15437 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15438 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15439 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15440 %}
15441 ins_pipe( pipe_slow );
15442 %}
15443
15444 //----------Overflow Math Instructions-----------------------------------------
15445
15446 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15447 %{
15448 match(Set cr (OverflowAddI op1 op2));
15449 effect(DEF cr, USE_KILL op1, USE op2);
15450
15451 format %{ "addl $op1, $op2\t# overflow check int" %}
15452
15453 ins_encode %{
15454 __ addl($op1$$Register, $op2$$Register);
15455 %}
15456 ins_pipe(ialu_reg_reg);
15457 %}
15458
15459 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15460 %{
15461 match(Set cr (OverflowAddI op1 op2));
15462 effect(DEF cr, USE_KILL op1, USE op2);
15463
15464 format %{ "addl $op1, $op2\t# overflow check int" %}
15465
15466 ins_encode %{
15467 __ addl($op1$$Register, $op2$$constant);
15468 %}
15469 ins_pipe(ialu_reg_reg);
15470 %}
15471
15472 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15473 %{
15474 match(Set cr (OverflowAddL op1 op2));
15475 effect(DEF cr, USE_KILL op1, USE op2);
15476
15477 format %{ "addq $op1, $op2\t# overflow check long" %}
15478 ins_encode %{
15479 __ addq($op1$$Register, $op2$$Register);
15480 %}
15481 ins_pipe(ialu_reg_reg);
15482 %}
15483
15484 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15485 %{
15486 match(Set cr (OverflowAddL op1 op2));
15487 effect(DEF cr, USE_KILL op1, USE op2);
15488
15489 format %{ "addq $op1, $op2\t# overflow check long" %}
15490 ins_encode %{
15491 __ addq($op1$$Register, $op2$$constant);
15492 %}
15493 ins_pipe(ialu_reg_reg);
15494 %}
15495
15496 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15497 %{
15498 match(Set cr (OverflowSubI op1 op2));
15499
15500 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15501 ins_encode %{
15502 __ cmpl($op1$$Register, $op2$$Register);
15503 %}
15504 ins_pipe(ialu_reg_reg);
15505 %}
15506
15507 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15508 %{
15509 match(Set cr (OverflowSubI op1 op2));
15510
15511 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15512 ins_encode %{
15513 __ cmpl($op1$$Register, $op2$$constant);
15514 %}
15515 ins_pipe(ialu_reg_reg);
15516 %}
15517
15518 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15519 %{
15520 match(Set cr (OverflowSubL op1 op2));
15521
15522 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15523 ins_encode %{
15524 __ cmpq($op1$$Register, $op2$$Register);
15525 %}
15526 ins_pipe(ialu_reg_reg);
15527 %}
15528
15529 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15530 %{
15531 match(Set cr (OverflowSubL op1 op2));
15532
15533 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15534 ins_encode %{
15535 __ cmpq($op1$$Register, $op2$$constant);
15536 %}
15537 ins_pipe(ialu_reg_reg);
15538 %}
15539
15540 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15541 %{
15542 match(Set cr (OverflowSubI zero op2));
15543 effect(DEF cr, USE_KILL op2);
15544
15545 format %{ "negl $op2\t# overflow check int" %}
15546 ins_encode %{
15547 __ negl($op2$$Register);
15548 %}
15549 ins_pipe(ialu_reg_reg);
15550 %}
15551
15552 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15553 %{
15554 match(Set cr (OverflowSubL zero op2));
15555 effect(DEF cr, USE_KILL op2);
15556
15557 format %{ "negq $op2\t# overflow check long" %}
15558 ins_encode %{
15559 __ negq($op2$$Register);
15560 %}
15561 ins_pipe(ialu_reg_reg);
15562 %}
15563
15564 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15565 %{
15566 match(Set cr (OverflowMulI op1 op2));
15567 effect(DEF cr, USE_KILL op1, USE op2);
15568
15569 format %{ "imull $op1, $op2\t# overflow check int" %}
15570 ins_encode %{
15571 __ imull($op1$$Register, $op2$$Register);
15572 %}
15573 ins_pipe(ialu_reg_reg_alu0);
15574 %}
15575
15576 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15577 %{
15578 match(Set cr (OverflowMulI op1 op2));
15579 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15580
15581 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15582 ins_encode %{
15583 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15584 %}
15585 ins_pipe(ialu_reg_reg_alu0);
15586 %}
15587
15588 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15589 %{
15590 match(Set cr (OverflowMulL op1 op2));
15591 effect(DEF cr, USE_KILL op1, USE op2);
15592
15593 format %{ "imulq $op1, $op2\t# overflow check long" %}
15594 ins_encode %{
15595 __ imulq($op1$$Register, $op2$$Register);
15596 %}
15597 ins_pipe(ialu_reg_reg_alu0);
15598 %}
15599
15600 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15601 %{
15602 match(Set cr (OverflowMulL op1 op2));
15603 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15604
15605 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15606 ins_encode %{
15607 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15608 %}
15609 ins_pipe(ialu_reg_reg_alu0);
15610 %}
15611
15612
15613 //----------Control Flow Instructions------------------------------------------
15614 // Signed compare Instructions
15615
15616 // XXX more variants!!
15617 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15618 %{
15619 match(Set cr (CmpI op1 op2));
15620 effect(DEF cr, USE op1, USE op2);
15621
15622 format %{ "cmpl $op1, $op2" %}
15623 ins_encode %{
15624 __ cmpl($op1$$Register, $op2$$Register);
15625 %}
15626 ins_pipe(ialu_cr_reg_reg);
15627 %}
15628
15629 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15630 %{
15631 match(Set cr (CmpI op1 op2));
15632
15633 format %{ "cmpl $op1, $op2" %}
15634 ins_encode %{
15635 __ cmpl($op1$$Register, $op2$$constant);
15636 %}
15637 ins_pipe(ialu_cr_reg_imm);
15638 %}
15639
15640 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15641 %{
15642 match(Set cr (CmpI op1 (LoadI op2)));
15643
15644 ins_cost(500); // XXX
15645 format %{ "cmpl $op1, $op2" %}
15646 ins_encode %{
15647 __ cmpl($op1$$Register, $op2$$Address);
15648 %}
15649 ins_pipe(ialu_cr_reg_mem);
15650 %}
15651
15652 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15653 %{
15654 match(Set cr (CmpI src zero));
15655
15656 format %{ "testl $src, $src" %}
15657 ins_encode %{
15658 __ testl($src$$Register, $src$$Register);
15659 %}
15660 ins_pipe(ialu_cr_reg_imm);
15661 %}
15662
15663 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15664 %{
15665 match(Set cr (CmpI (AndI src con) zero));
15666
15667 format %{ "testl $src, $con" %}
15668 ins_encode %{
15669 __ testl($src$$Register, $con$$constant);
15670 %}
15671 ins_pipe(ialu_cr_reg_imm);
15672 %}
15673
15674 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
15675 %{
15676 match(Set cr (CmpI (AndI src1 src2) zero));
15677
15678 format %{ "testl $src1, $src2" %}
15679 ins_encode %{
15680 __ testl($src1$$Register, $src2$$Register);
15681 %}
15682 ins_pipe(ialu_cr_reg_imm);
15683 %}
15684
15685 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
15686 %{
15687 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
15688
15689 format %{ "testl $src, $mem" %}
15690 ins_encode %{
15691 __ testl($src$$Register, $mem$$Address);
15692 %}
15693 ins_pipe(ialu_cr_reg_mem);
15694 %}
15695
15696 // Unsigned compare Instructions; really, same as signed except they
15697 // produce an rFlagsRegU instead of rFlagsReg.
15698 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
15699 %{
15700 match(Set cr (CmpU op1 op2));
15701
15702 format %{ "cmpl $op1, $op2\t# unsigned" %}
15703 ins_encode %{
15704 __ cmpl($op1$$Register, $op2$$Register);
15705 %}
15706 ins_pipe(ialu_cr_reg_reg);
15707 %}
15708
15709 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
15710 %{
15711 match(Set cr (CmpU op1 op2));
15712
15713 format %{ "cmpl $op1, $op2\t# unsigned" %}
15714 ins_encode %{
15715 __ cmpl($op1$$Register, $op2$$constant);
15716 %}
15717 ins_pipe(ialu_cr_reg_imm);
15718 %}
15719
15720 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
15721 %{
15722 match(Set cr (CmpU op1 (LoadI op2)));
15723
15724 ins_cost(500); // XXX
15725 format %{ "cmpl $op1, $op2\t# unsigned" %}
15726 ins_encode %{
15727 __ cmpl($op1$$Register, $op2$$Address);
15728 %}
15729 ins_pipe(ialu_cr_reg_mem);
15730 %}
15731
15732 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
15733 %{
15734 match(Set cr (CmpU src zero));
15735
15736 format %{ "testl $src, $src\t# unsigned" %}
15737 ins_encode %{
15738 __ testl($src$$Register, $src$$Register);
15739 %}
15740 ins_pipe(ialu_cr_reg_imm);
15741 %}
15742
15743 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
15744 %{
15745 match(Set cr (CmpP op1 op2));
15746
15747 format %{ "cmpq $op1, $op2\t# ptr" %}
15748 ins_encode %{
15749 __ cmpq($op1$$Register, $op2$$Register);
15750 %}
15751 ins_pipe(ialu_cr_reg_reg);
15752 %}
15753
15754 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
15755 %{
15756 match(Set cr (CmpP op1 (LoadP op2)));
15757 predicate(n->in(2)->as_Load()->barrier_data() == 0);
15758
15759 ins_cost(500); // XXX
15760 format %{ "cmpq $op1, $op2\t# ptr" %}
15761 ins_encode %{
15762 __ cmpq($op1$$Register, $op2$$Address);
15763 %}
15764 ins_pipe(ialu_cr_reg_mem);
15765 %}
15766
15767 // XXX this is generalized by compP_rReg_mem???
15768 // Compare raw pointer (used in out-of-heap check).
15769 // Only works because non-oop pointers must be raw pointers
15770 // and raw pointers have no anti-dependencies.
15771 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
15772 %{
15773 predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
15774 n->in(2)->as_Load()->barrier_data() == 0);
15775 match(Set cr (CmpP op1 (LoadP op2)));
15776
15777 format %{ "cmpq $op1, $op2\t# raw ptr" %}
15778 ins_encode %{
15779 __ cmpq($op1$$Register, $op2$$Address);
15780 %}
15781 ins_pipe(ialu_cr_reg_mem);
15782 %}
15783
15784 // This will generate a signed flags result. This should be OK since
15785 // any compare to a zero should be eq/neq.
15786 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
15787 %{
15788 match(Set cr (CmpP src zero));
15789
15790 format %{ "testq $src, $src\t# ptr" %}
15791 ins_encode %{
15792 __ testq($src$$Register, $src$$Register);
15793 %}
15794 ins_pipe(ialu_cr_reg_imm);
15795 %}
15796
15797 // This will generate a signed flags result. This should be OK since
15798 // any compare to a zero should be eq/neq.
15799 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
15800 %{
15801 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
15802 n->in(1)->as_Load()->barrier_data() == 0);
15803 match(Set cr (CmpP (LoadP op) zero));
15804
15805 ins_cost(500); // XXX
15806 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
15807 ins_encode %{
15808 __ testq($op$$Address, 0xFFFFFFFF);
15809 %}
15810 ins_pipe(ialu_cr_reg_imm);
15811 %}
15812
15813 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
15814 %{
15815 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
15816 n->in(1)->as_Load()->barrier_data() == 0);
15817 match(Set cr (CmpP (LoadP mem) zero));
15818
15819 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
15820 ins_encode %{
15821 __ cmpq(r12, $mem$$Address);
15822 %}
15823 ins_pipe(ialu_cr_reg_mem);
15824 %}
15825
15826 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
15827 %{
15828 match(Set cr (CmpN op1 op2));
15829
15830 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
15831 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
15832 ins_pipe(ialu_cr_reg_reg);
15833 %}
15834
15835 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
15836 %{
15837 predicate(n->in(2)->as_Load()->barrier_data() == 0);
15838 match(Set cr (CmpN src (LoadN mem)));
15839
15840 format %{ "cmpl $src, $mem\t# compressed ptr" %}
15841 ins_encode %{
15842 __ cmpl($src$$Register, $mem$$Address);
15843 %}
15844 ins_pipe(ialu_cr_reg_mem);
15845 %}
15846
15847 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
15848 match(Set cr (CmpN op1 op2));
15849
15850 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
15851 ins_encode %{
15852 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
15853 %}
15854 ins_pipe(ialu_cr_reg_imm);
15855 %}
15856
15857 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
15858 %{
15859 predicate(n->in(2)->as_Load()->barrier_data() == 0);
15860 match(Set cr (CmpN src (LoadN mem)));
15861
15862 format %{ "cmpl $mem, $src\t# compressed ptr" %}
15863 ins_encode %{
15864 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
15865 %}
15866 ins_pipe(ialu_cr_reg_mem);
15867 %}
15868
15869 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
15870 match(Set cr (CmpN op1 op2));
15871
15872 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
15873 ins_encode %{
15874 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
15875 %}
15876 ins_pipe(ialu_cr_reg_imm);
15877 %}
15878
15879 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
15880 %{
15881 predicate(!UseCompactObjectHeaders);
15882 match(Set cr (CmpN src (LoadNKlass mem)));
15883
15884 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
15885 ins_encode %{
15886 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
15887 %}
15888 ins_pipe(ialu_cr_reg_mem);
15889 %}
15890
15891 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
15892 match(Set cr (CmpN src zero));
15893
15894 format %{ "testl $src, $src\t# compressed ptr" %}
15895 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
15896 ins_pipe(ialu_cr_reg_imm);
15897 %}
15898
15899 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
15900 %{
15901 predicate(CompressedOops::base() != nullptr &&
15902 n->in(1)->as_Load()->barrier_data() == 0);
15903 match(Set cr (CmpN (LoadN mem) zero));
15904
15905 ins_cost(500); // XXX
15906 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
15907 ins_encode %{
15908 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
15909 %}
15910 ins_pipe(ialu_cr_reg_mem);
15911 %}
15912
15913 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
15914 %{
15915 predicate(CompressedOops::base() == nullptr &&
15916 n->in(1)->as_Load()->barrier_data() == 0);
15917 match(Set cr (CmpN (LoadN mem) zero));
15918
15919 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
15920 ins_encode %{
15921 __ cmpl(r12, $mem$$Address);
15922 %}
15923 ins_pipe(ialu_cr_reg_mem);
15924 %}
15925
15926 // Yanked all unsigned pointer compare operations.
15927 // Pointer compares are done with CmpP which is already unsigned.
15928
15929 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15930 %{
15931 match(Set cr (CmpL op1 op2));
15932
15933 format %{ "cmpq $op1, $op2" %}
15934 ins_encode %{
15935 __ cmpq($op1$$Register, $op2$$Register);
15936 %}
15937 ins_pipe(ialu_cr_reg_reg);
15938 %}
15939
15940 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15941 %{
15942 match(Set cr (CmpL op1 op2));
15943
15944 format %{ "cmpq $op1, $op2" %}
15945 ins_encode %{
15946 __ cmpq($op1$$Register, $op2$$constant);
15947 %}
15948 ins_pipe(ialu_cr_reg_imm);
15949 %}
15950
15951 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
15952 %{
15953 match(Set cr (CmpL op1 (LoadL op2)));
15954
15955 format %{ "cmpq $op1, $op2" %}
15956 ins_encode %{
15957 __ cmpq($op1$$Register, $op2$$Address);
15958 %}
15959 ins_pipe(ialu_cr_reg_mem);
15960 %}
15961
15962 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
15963 %{
15964 match(Set cr (CmpL src zero));
15965
15966 format %{ "testq $src, $src" %}
15967 ins_encode %{
15968 __ testq($src$$Register, $src$$Register);
15969 %}
15970 ins_pipe(ialu_cr_reg_imm);
15971 %}
15972
15973 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
15974 %{
15975 match(Set cr (CmpL (AndL src con) zero));
15976
15977 format %{ "testq $src, $con\t# long" %}
15978 ins_encode %{
15979 __ testq($src$$Register, $con$$constant);
15980 %}
15981 ins_pipe(ialu_cr_reg_imm);
15982 %}
15983
15984 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
15985 %{
15986 match(Set cr (CmpL (AndL src1 src2) zero));
15987
15988 format %{ "testq $src1, $src2\t# long" %}
15989 ins_encode %{
15990 __ testq($src1$$Register, $src2$$Register);
15991 %}
15992 ins_pipe(ialu_cr_reg_imm);
15993 %}
15994
15995 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
15996 %{
15997 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
15998
15999 format %{ "testq $src, $mem" %}
16000 ins_encode %{
16001 __ testq($src$$Register, $mem$$Address);
16002 %}
16003 ins_pipe(ialu_cr_reg_mem);
16004 %}
16005
16006 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16007 %{
16008 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16009
16010 format %{ "testq $src, $mem" %}
16011 ins_encode %{
16012 __ testq($src$$Register, $mem$$Address);
16013 %}
16014 ins_pipe(ialu_cr_reg_mem);
16015 %}
16016
16017 // Manifest a CmpU result in an integer register. Very painful.
16018 // This is the test to avoid.
16019 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16020 %{
16021 match(Set dst (CmpU3 src1 src2));
16022 effect(KILL flags);
16023
16024 ins_cost(275); // XXX
16025 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16026 "movl $dst, -1\n\t"
16027 "jb,u done\n\t"
16028 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16029 "done:" %}
16030 ins_encode %{
16031 Label done;
16032 __ cmpl($src1$$Register, $src2$$Register);
16033 __ movl($dst$$Register, -1);
16034 __ jccb(Assembler::below, done);
16035 __ setcc(Assembler::notZero, $dst$$Register);
16036 __ bind(done);
16037 %}
16038 ins_pipe(pipe_slow);
16039 %}
16040
16041 // Manifest a CmpL result in an integer register. Very painful.
16042 // This is the test to avoid.
16043 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16044 %{
16045 match(Set dst (CmpL3 src1 src2));
16046 effect(KILL flags);
16047
16048 ins_cost(275); // XXX
16049 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16050 "movl $dst, -1\n\t"
16051 "jl,s done\n\t"
16052 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16053 "done:" %}
16054 ins_encode %{
16055 Label done;
16056 __ cmpq($src1$$Register, $src2$$Register);
16057 __ movl($dst$$Register, -1);
16058 __ jccb(Assembler::less, done);
16059 __ setcc(Assembler::notZero, $dst$$Register);
16060 __ bind(done);
16061 %}
16062 ins_pipe(pipe_slow);
16063 %}
16064
16065 // Manifest a CmpUL result in an integer register. Very painful.
16066 // This is the test to avoid.
16067 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16068 %{
16069 match(Set dst (CmpUL3 src1 src2));
16070 effect(KILL flags);
16071
16072 ins_cost(275); // XXX
16073 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16074 "movl $dst, -1\n\t"
16075 "jb,u done\n\t"
16076 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16077 "done:" %}
16078 ins_encode %{
16079 Label done;
16080 __ cmpq($src1$$Register, $src2$$Register);
16081 __ movl($dst$$Register, -1);
16082 __ jccb(Assembler::below, done);
16083 __ setcc(Assembler::notZero, $dst$$Register);
16084 __ bind(done);
16085 %}
16086 ins_pipe(pipe_slow);
16087 %}
16088
16089 // Unsigned long compare Instructions; really, same as signed long except they
16090 // produce an rFlagsRegU instead of rFlagsReg.
16091 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16092 %{
16093 match(Set cr (CmpUL op1 op2));
16094
16095 format %{ "cmpq $op1, $op2\t# unsigned" %}
16096 ins_encode %{
16097 __ cmpq($op1$$Register, $op2$$Register);
16098 %}
16099 ins_pipe(ialu_cr_reg_reg);
16100 %}
16101
16102 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16103 %{
16104 match(Set cr (CmpUL op1 op2));
16105
16106 format %{ "cmpq $op1, $op2\t# unsigned" %}
16107 ins_encode %{
16108 __ cmpq($op1$$Register, $op2$$constant);
16109 %}
16110 ins_pipe(ialu_cr_reg_imm);
16111 %}
16112
16113 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16114 %{
16115 match(Set cr (CmpUL op1 (LoadL op2)));
16116
16117 format %{ "cmpq $op1, $op2\t# unsigned" %}
16118 ins_encode %{
16119 __ cmpq($op1$$Register, $op2$$Address);
16120 %}
16121 ins_pipe(ialu_cr_reg_mem);
16122 %}
16123
16124 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16125 %{
16126 match(Set cr (CmpUL src zero));
16127
16128 format %{ "testq $src, $src\t# unsigned" %}
16129 ins_encode %{
16130 __ testq($src$$Register, $src$$Register);
16131 %}
16132 ins_pipe(ialu_cr_reg_imm);
16133 %}
16134
16135 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16136 %{
16137 match(Set cr (CmpI (LoadB mem) imm));
16138
16139 ins_cost(125);
16140 format %{ "cmpb $mem, $imm" %}
16141 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16142 ins_pipe(ialu_cr_reg_mem);
16143 %}
16144
16145 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16146 %{
16147 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16148
16149 ins_cost(125);
16150 format %{ "testb $mem, $imm\t# ubyte" %}
16151 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16152 ins_pipe(ialu_cr_reg_mem);
16153 %}
16154
16155 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16156 %{
16157 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16158
16159 ins_cost(125);
16160 format %{ "testb $mem, $imm\t# byte" %}
16161 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16162 ins_pipe(ialu_cr_reg_mem);
16163 %}
16164
16165 //----------Max and Min--------------------------------------------------------
16166 // Min Instructions
16167
16168 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16169 %{
16170 predicate(!UseAPX);
16171 effect(USE_DEF dst, USE src, USE cr);
16172
16173 format %{ "cmovlgt $dst, $src\t# min" %}
16174 ins_encode %{
16175 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16176 %}
16177 ins_pipe(pipe_cmov_reg);
16178 %}
16179
16180 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16181 %{
16182 predicate(UseAPX);
16183 effect(DEF dst, USE src1, USE src2, USE cr);
16184
16185 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16186 ins_encode %{
16187 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16188 %}
16189 ins_pipe(pipe_cmov_reg);
16190 %}
16191
16192 instruct minI_rReg(rRegI dst, rRegI src)
16193 %{
16194 predicate(!UseAPX);
16195 match(Set dst (MinI dst src));
16196
16197 ins_cost(200);
16198 expand %{
16199 rFlagsReg cr;
16200 compI_rReg(cr, dst, src);
16201 cmovI_reg_g(dst, src, cr);
16202 %}
16203 %}
16204
16205 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16206 %{
16207 predicate(UseAPX);
16208 match(Set dst (MinI src1 src2));
16209 effect(DEF dst, USE src1, USE src2);
16210 flag(PD::Flag_ndd_demotable_opr1);
16211
16212 ins_cost(200);
16213 expand %{
16214 rFlagsReg cr;
16215 compI_rReg(cr, src1, src2);
16216 cmovI_reg_g_ndd(dst, src1, src2, cr);
16217 %}
16218 %}
16219
16220 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16221 %{
16222 predicate(!UseAPX);
16223 effect(USE_DEF dst, USE src, USE cr);
16224
16225 format %{ "cmovllt $dst, $src\t# max" %}
16226 ins_encode %{
16227 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16228 %}
16229 ins_pipe(pipe_cmov_reg);
16230 %}
16231
16232 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16233 %{
16234 predicate(UseAPX);
16235 effect(DEF dst, USE src1, USE src2, USE cr);
16236
16237 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16238 ins_encode %{
16239 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16240 %}
16241 ins_pipe(pipe_cmov_reg);
16242 %}
16243
16244 instruct maxI_rReg(rRegI dst, rRegI src)
16245 %{
16246 predicate(!UseAPX);
16247 match(Set dst (MaxI dst src));
16248
16249 ins_cost(200);
16250 expand %{
16251 rFlagsReg cr;
16252 compI_rReg(cr, dst, src);
16253 cmovI_reg_l(dst, src, cr);
16254 %}
16255 %}
16256
16257 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16258 %{
16259 predicate(UseAPX);
16260 match(Set dst (MaxI src1 src2));
16261 effect(DEF dst, USE src1, USE src2);
16262 flag(PD::Flag_ndd_demotable_opr1);
16263
16264 ins_cost(200);
16265 expand %{
16266 rFlagsReg cr;
16267 compI_rReg(cr, src1, src2);
16268 cmovI_reg_l_ndd(dst, src1, src2, cr);
16269 %}
16270 %}
16271
16272 // ============================================================================
16273 // Branch Instructions
16274
16275 // Jump Direct - Label defines a relative address from JMP+1
16276 instruct jmpDir(label labl)
16277 %{
16278 match(Goto);
16279 effect(USE labl);
16280
16281 ins_cost(300);
16282 format %{ "jmp $labl" %}
16283 size(5);
16284 ins_encode %{
16285 Label* L = $labl$$label;
16286 __ jmp(*L, false); // Always long jump
16287 %}
16288 ins_pipe(pipe_jmp);
16289 %}
16290
16291 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16292 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16293 %{
16294 match(If cop cr);
16295 effect(USE labl);
16296
16297 ins_cost(300);
16298 format %{ "j$cop $labl" %}
16299 size(6);
16300 ins_encode %{
16301 Label* L = $labl$$label;
16302 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16303 %}
16304 ins_pipe(pipe_jcc);
16305 %}
16306
16307 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16308 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16309 %{
16310 match(CountedLoopEnd cop cr);
16311 effect(USE labl);
16312
16313 ins_cost(300);
16314 format %{ "j$cop $labl\t# loop end" %}
16315 size(6);
16316 ins_encode %{
16317 Label* L = $labl$$label;
16318 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16319 %}
16320 ins_pipe(pipe_jcc);
16321 %}
16322
16323 // Jump Direct Conditional - using unsigned comparison
16324 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16325 match(If cop cmp);
16326 effect(USE labl);
16327
16328 ins_cost(300);
16329 format %{ "j$cop,u $labl" %}
16330 size(6);
16331 ins_encode %{
16332 Label* L = $labl$$label;
16333 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16334 %}
16335 ins_pipe(pipe_jcc);
16336 %}
16337
16338 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16339 match(If cop cmp);
16340 effect(USE labl);
16341
16342 ins_cost(200);
16343 format %{ "j$cop,u $labl" %}
16344 size(6);
16345 ins_encode %{
16346 Label* L = $labl$$label;
16347 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16348 %}
16349 ins_pipe(pipe_jcc);
16350 %}
16351
16352 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16353 match(If cop cmp);
16354 effect(USE labl);
16355
16356 ins_cost(200);
16357 format %{ $$template
16358 if ($cop$$cmpcode == Assembler::notEqual) {
16359 $$emit$$"jp,u $labl\n\t"
16360 $$emit$$"j$cop,u $labl"
16361 } else {
16362 $$emit$$"jp,u done\n\t"
16363 $$emit$$"j$cop,u $labl\n\t"
16364 $$emit$$"done:"
16365 }
16366 %}
16367 ins_encode %{
16368 Label* l = $labl$$label;
16369 if ($cop$$cmpcode == Assembler::notEqual) {
16370 __ jcc(Assembler::parity, *l, false);
16371 __ jcc(Assembler::notEqual, *l, false);
16372 } else if ($cop$$cmpcode == Assembler::equal) {
16373 Label done;
16374 __ jccb(Assembler::parity, done);
16375 __ jcc(Assembler::equal, *l, false);
16376 __ bind(done);
16377 } else {
16378 ShouldNotReachHere();
16379 }
16380 %}
16381 ins_pipe(pipe_jcc);
16382 %}
16383
16384 // Jump Direct Conditional - using signed and unsigned comparison
16385 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16386 match(If cop cmp);
16387 effect(USE labl);
16388
16389 ins_cost(200);
16390 format %{ "j$cop,su $labl" %}
16391 size(6);
16392 ins_encode %{
16393 Label* L = $labl$$label;
16394 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16395 %}
16396 ins_pipe(pipe_jcc);
16397 %}
16398
16399 // ============================================================================
16400 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16401 // superklass array for an instance of the superklass. Set a hidden
16402 // internal cache on a hit (cache is checked with exposed code in
16403 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16404 // encoding ALSO sets flags.
16405
16406 instruct partialSubtypeCheck(rdi_RegP result,
16407 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16408 rFlagsReg cr)
16409 %{
16410 match(Set result (PartialSubtypeCheck sub super));
16411 predicate(!UseSecondarySupersTable);
16412 effect(KILL rcx, KILL cr);
16413
16414 ins_cost(1100); // slightly larger than the next version
16415 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16416 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16417 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16418 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16419 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16420 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16421 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16422 "miss:\t" %}
16423
16424 ins_encode %{
16425 Label miss;
16426 // NB: Callers may assume that, when $result is a valid register,
16427 // check_klass_subtype_slow_path_linear sets it to a nonzero
16428 // value.
16429 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16430 $rcx$$Register, $result$$Register,
16431 nullptr, &miss,
16432 /*set_cond_codes:*/ true);
16433 __ xorptr($result$$Register, $result$$Register);
16434 __ bind(miss);
16435 %}
16436
16437 ins_pipe(pipe_slow);
16438 %}
16439
16440 // ============================================================================
16441 // Two versions of hashtable-based partialSubtypeCheck, both used when
16442 // we need to search for a super class in the secondary supers array.
16443 // The first is used when we don't know _a priori_ the class being
16444 // searched for. The second, far more common, is used when we do know:
16445 // this is used for instanceof, checkcast, and any case where C2 can
16446 // determine it by constant propagation.
16447
16448 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16449 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16450 rFlagsReg cr)
16451 %{
16452 match(Set result (PartialSubtypeCheck sub super));
16453 predicate(UseSecondarySupersTable);
16454 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16455
16456 ins_cost(1000);
16457 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16458
16459 ins_encode %{
16460 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16461 $temp3$$Register, $temp4$$Register, $result$$Register);
16462 %}
16463
16464 ins_pipe(pipe_slow);
16465 %}
16466
16467 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16468 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16469 rFlagsReg cr)
16470 %{
16471 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16472 predicate(UseSecondarySupersTable);
16473 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16474
16475 ins_cost(700); // smaller than the next version
16476 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16477
16478 ins_encode %{
16479 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16480 if (InlineSecondarySupersTest) {
16481 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16482 $temp3$$Register, $temp4$$Register, $result$$Register,
16483 super_klass_slot);
16484 } else {
16485 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16486 }
16487 %}
16488
16489 ins_pipe(pipe_slow);
16490 %}
16491
16492 // ============================================================================
16493 // Branch Instructions -- short offset versions
16494 //
16495 // These instructions are used to replace jumps of a long offset (the default
16496 // match) with jumps of a shorter offset. These instructions are all tagged
16497 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16498 // match rules in general matching. Instead, the ADLC generates a conversion
16499 // method in the MachNode which can be used to do in-place replacement of the
16500 // long variant with the shorter variant. The compiler will determine if a
16501 // branch can be taken by the is_short_branch_offset() predicate in the machine
16502 // specific code section of the file.
16503
16504 // Jump Direct - Label defines a relative address from JMP+1
16505 instruct jmpDir_short(label labl) %{
16506 match(Goto);
16507 effect(USE labl);
16508
16509 ins_cost(300);
16510 format %{ "jmp,s $labl" %}
16511 size(2);
16512 ins_encode %{
16513 Label* L = $labl$$label;
16514 __ jmpb(*L);
16515 %}
16516 ins_pipe(pipe_jmp);
16517 ins_short_branch(1);
16518 %}
16519
16520 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16521 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16522 match(If cop cr);
16523 effect(USE labl);
16524
16525 ins_cost(300);
16526 format %{ "j$cop,s $labl" %}
16527 size(2);
16528 ins_encode %{
16529 Label* L = $labl$$label;
16530 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16531 %}
16532 ins_pipe(pipe_jcc);
16533 ins_short_branch(1);
16534 %}
16535
16536 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16537 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16538 match(CountedLoopEnd cop cr);
16539 effect(USE labl);
16540
16541 ins_cost(300);
16542 format %{ "j$cop,s $labl\t# loop end" %}
16543 size(2);
16544 ins_encode %{
16545 Label* L = $labl$$label;
16546 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16547 %}
16548 ins_pipe(pipe_jcc);
16549 ins_short_branch(1);
16550 %}
16551
16552 // Jump Direct Conditional - using unsigned comparison
16553 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16554 match(If cop cmp);
16555 effect(USE labl);
16556
16557 ins_cost(300);
16558 format %{ "j$cop,us $labl" %}
16559 size(2);
16560 ins_encode %{
16561 Label* L = $labl$$label;
16562 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16563 %}
16564 ins_pipe(pipe_jcc);
16565 ins_short_branch(1);
16566 %}
16567
16568 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16569 match(If cop cmp);
16570 effect(USE labl);
16571
16572 ins_cost(300);
16573 format %{ "j$cop,us $labl" %}
16574 size(2);
16575 ins_encode %{
16576 Label* L = $labl$$label;
16577 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16578 %}
16579 ins_pipe(pipe_jcc);
16580 ins_short_branch(1);
16581 %}
16582
16583 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16584 match(If cop cmp);
16585 effect(USE labl);
16586
16587 ins_cost(300);
16588 format %{ $$template
16589 if ($cop$$cmpcode == Assembler::notEqual) {
16590 $$emit$$"jp,u,s $labl\n\t"
16591 $$emit$$"j$cop,u,s $labl"
16592 } else {
16593 $$emit$$"jp,u,s done\n\t"
16594 $$emit$$"j$cop,u,s $labl\n\t"
16595 $$emit$$"done:"
16596 }
16597 %}
16598 size(4);
16599 ins_encode %{
16600 Label* l = $labl$$label;
16601 if ($cop$$cmpcode == Assembler::notEqual) {
16602 __ jccb(Assembler::parity, *l);
16603 __ jccb(Assembler::notEqual, *l);
16604 } else if ($cop$$cmpcode == Assembler::equal) {
16605 Label done;
16606 __ jccb(Assembler::parity, done);
16607 __ jccb(Assembler::equal, *l);
16608 __ bind(done);
16609 } else {
16610 ShouldNotReachHere();
16611 }
16612 %}
16613 ins_pipe(pipe_jcc);
16614 ins_short_branch(1);
16615 %}
16616
16617 // Jump Direct Conditional - using signed and unsigned comparison
16618 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16619 match(If cop cmp);
16620 effect(USE labl);
16621
16622 ins_cost(300);
16623 format %{ "j$cop,sus $labl" %}
16624 size(2);
16625 ins_encode %{
16626 Label* L = $labl$$label;
16627 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16628 %}
16629 ins_pipe(pipe_jcc);
16630 ins_short_branch(1);
16631 %}
16632
16633 // ============================================================================
16634 // inlined locking and unlocking
16635
16636 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16637 match(Set cr (FastLock object box));
16638 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16639 ins_cost(300);
16640 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16641 ins_encode %{
16642 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16643 %}
16644 ins_pipe(pipe_slow);
16645 %}
16646
16647 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16648 match(Set cr (FastUnlock object rax_reg));
16649 effect(TEMP tmp, USE_KILL rax_reg);
16650 ins_cost(300);
16651 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16652 ins_encode %{
16653 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16654 %}
16655 ins_pipe(pipe_slow);
16656 %}
16657
16658
16659 // ============================================================================
16660 // Safepoint Instructions
16661 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16662 %{
16663 match(SafePoint poll);
16664 effect(KILL cr, USE poll);
16665
16666 format %{ "testl rax, [$poll]\t"
16667 "# Safepoint: poll for GC" %}
16668 ins_cost(125);
16669 ins_encode %{
16670 __ relocate(relocInfo::poll_type);
16671 address pre_pc = __ pc();
16672 __ testl(rax, Address($poll$$Register, 0));
16673 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16674 %}
16675 ins_pipe(ialu_reg_mem);
16676 %}
16677
16678 instruct mask_all_evexL(kReg dst, rRegL src) %{
16679 match(Set dst (MaskAll src));
16680 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16681 ins_encode %{
16682 int mask_len = Matcher::vector_length(this);
16683 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16684 %}
16685 ins_pipe( pipe_slow );
16686 %}
16687
16688 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16689 predicate(Matcher::vector_length(n) > 32);
16690 match(Set dst (MaskAll src));
16691 effect(TEMP tmp);
16692 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16693 ins_encode %{
16694 int mask_len = Matcher::vector_length(this);
16695 __ movslq($tmp$$Register, $src$$Register);
16696 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16697 %}
16698 ins_pipe( pipe_slow );
16699 %}
16700
16701 // ============================================================================
16702 // Procedure Call/Return Instructions
16703 // Call Java Static Instruction
16704 // Note: If this code changes, the corresponding ret_addr_offset() and
16705 // compute_padding() functions will have to be adjusted.
16706 instruct CallStaticJavaDirect(method meth) %{
16707 match(CallStaticJava);
16708 effect(USE meth);
16709
16710 ins_cost(300);
16711 format %{ "call,static " %}
16712 opcode(0xE8); /* E8 cd */
16713 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
16714 ins_pipe(pipe_slow);
16715 ins_alignment(4);
16716 %}
16717
16718 // Call Java Dynamic Instruction
16719 // Note: If this code changes, the corresponding ret_addr_offset() and
16720 // compute_padding() functions will have to be adjusted.
16721 instruct CallDynamicJavaDirect(method meth)
16722 %{
16723 match(CallDynamicJava);
16724 effect(USE meth);
16725
16726 ins_cost(300);
16727 format %{ "movq rax, #Universe::non_oop_word()\n\t"
16728 "call,dynamic " %}
16729 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
16730 ins_pipe(pipe_slow);
16731 ins_alignment(4);
16732 %}
16733
16734 // Call Runtime Instruction
16735 instruct CallRuntimeDirect(method meth)
16736 %{
16737 match(CallRuntime);
16738 effect(USE meth);
16739
16740 ins_cost(300);
16741 format %{ "call,runtime " %}
16742 ins_encode(clear_avx, Java_To_Runtime(meth));
16743 ins_pipe(pipe_slow);
16744 %}
16745
16746 // Call runtime without safepoint
16747 instruct CallLeafDirect(method meth)
16748 %{
16749 match(CallLeaf);
16750 effect(USE meth);
16751
16752 ins_cost(300);
16753 format %{ "call_leaf,runtime " %}
16754 ins_encode(clear_avx, Java_To_Runtime(meth));
16755 ins_pipe(pipe_slow);
16756 %}
16757
16758 // Call runtime without safepoint and with vector arguments
16759 instruct CallLeafDirectVector(method meth)
16760 %{
16761 match(CallLeafVector);
16762 effect(USE meth);
16763
16764 ins_cost(300);
16765 format %{ "call_leaf,vector " %}
16766 ins_encode(Java_To_Runtime(meth));
16767 ins_pipe(pipe_slow);
16768 %}
16769
16770 // Call runtime without safepoint
16771 instruct CallLeafNoFPDirect(method meth)
16772 %{
16773 match(CallLeafNoFP);
16774 effect(USE meth);
16775
16776 ins_cost(300);
16777 format %{ "call_leaf_nofp,runtime " %}
16778 ins_encode(clear_avx, Java_To_Runtime(meth));
16779 ins_pipe(pipe_slow);
16780 %}
16781
16782 // Return Instruction
16783 // Remove the return address & jump to it.
16784 // Notice: We always emit a nop after a ret to make sure there is room
16785 // for safepoint patching
16786 instruct Ret()
16787 %{
16788 match(Return);
16789
16790 format %{ "ret" %}
16791 ins_encode %{
16792 __ ret(0);
16793 %}
16794 ins_pipe(pipe_jmp);
16795 %}
16796
16797 // Tail Call; Jump from runtime stub to Java code.
16798 // Also known as an 'interprocedural jump'.
16799 // Target of jump will eventually return to caller.
16800 // TailJump below removes the return address.
16801 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
16802 // emitted just above the TailCall which has reset rbp to the caller state.
16803 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
16804 %{
16805 match(TailCall jump_target method_ptr);
16806
16807 ins_cost(300);
16808 format %{ "jmp $jump_target\t# rbx holds method" %}
16809 ins_encode %{
16810 __ jmp($jump_target$$Register);
16811 %}
16812 ins_pipe(pipe_jmp);
16813 %}
16814
16815 // Tail Jump; remove the return address; jump to target.
16816 // TailCall above leaves the return address around.
16817 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
16818 %{
16819 match(TailJump jump_target ex_oop);
16820
16821 ins_cost(300);
16822 format %{ "popq rdx\t# pop return address\n\t"
16823 "jmp $jump_target" %}
16824 ins_encode %{
16825 __ popq(as_Register(RDX_enc));
16826 __ jmp($jump_target$$Register);
16827 %}
16828 ins_pipe(pipe_jmp);
16829 %}
16830
16831 // Forward exception.
16832 instruct ForwardExceptionjmp()
16833 %{
16834 match(ForwardException);
16835
16836 format %{ "jmp forward_exception_stub" %}
16837 ins_encode %{
16838 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
16839 %}
16840 ins_pipe(pipe_jmp);
16841 %}
16842
16843 // Create exception oop: created by stack-crawling runtime code.
16844 // Created exception is now available to this handler, and is setup
16845 // just prior to jumping to this handler. No code emitted.
16846 instruct CreateException(rax_RegP ex_oop)
16847 %{
16848 match(Set ex_oop (CreateEx));
16849
16850 size(0);
16851 // use the following format syntax
16852 format %{ "# exception oop is in rax; no code emitted" %}
16853 ins_encode();
16854 ins_pipe(empty);
16855 %}
16856
16857 // Rethrow exception:
16858 // The exception oop will come in the first argument position.
16859 // Then JUMP (not call) to the rethrow stub code.
16860 instruct RethrowException()
16861 %{
16862 match(Rethrow);
16863
16864 // use the following format syntax
16865 format %{ "jmp rethrow_stub" %}
16866 ins_encode %{
16867 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
16868 %}
16869 ins_pipe(pipe_jmp);
16870 %}
16871
16872 // ============================================================================
16873 // This name is KNOWN by the ADLC and cannot be changed.
16874 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16875 // for this guy.
16876 instruct tlsLoadP(r15_RegP dst) %{
16877 match(Set dst (ThreadLocal));
16878 effect(DEF dst);
16879
16880 size(0);
16881 format %{ "# TLS is in R15" %}
16882 ins_encode( /*empty encoding*/ );
16883 ins_pipe(ialu_reg_reg);
16884 %}
16885
16886 instruct addF_reg(regF dst, regF src) %{
16887 predicate(UseAVX == 0);
16888 match(Set dst (AddF dst src));
16889
16890 format %{ "addss $dst, $src" %}
16891 ins_cost(150);
16892 ins_encode %{
16893 __ addss($dst$$XMMRegister, $src$$XMMRegister);
16894 %}
16895 ins_pipe(pipe_slow);
16896 %}
16897
16898 instruct addF_mem(regF dst, memory src) %{
16899 predicate(UseAVX == 0);
16900 match(Set dst (AddF dst (LoadF src)));
16901
16902 format %{ "addss $dst, $src" %}
16903 ins_cost(150);
16904 ins_encode %{
16905 __ addss($dst$$XMMRegister, $src$$Address);
16906 %}
16907 ins_pipe(pipe_slow);
16908 %}
16909
16910 instruct addF_imm(regF dst, immF con) %{
16911 predicate(UseAVX == 0);
16912 match(Set dst (AddF dst con));
16913 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
16914 ins_cost(150);
16915 ins_encode %{
16916 __ addss($dst$$XMMRegister, $constantaddress($con));
16917 %}
16918 ins_pipe(pipe_slow);
16919 %}
16920
16921 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
16922 predicate(UseAVX > 0);
16923 match(Set dst (AddF src1 src2));
16924
16925 format %{ "vaddss $dst, $src1, $src2" %}
16926 ins_cost(150);
16927 ins_encode %{
16928 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
16929 %}
16930 ins_pipe(pipe_slow);
16931 %}
16932
16933 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
16934 predicate(UseAVX > 0);
16935 match(Set dst (AddF src1 (LoadF src2)));
16936
16937 format %{ "vaddss $dst, $src1, $src2" %}
16938 ins_cost(150);
16939 ins_encode %{
16940 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
16941 %}
16942 ins_pipe(pipe_slow);
16943 %}
16944
16945 instruct addF_reg_imm(regF dst, regF src, immF con) %{
16946 predicate(UseAVX > 0);
16947 match(Set dst (AddF src con));
16948
16949 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
16950 ins_cost(150);
16951 ins_encode %{
16952 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
16953 %}
16954 ins_pipe(pipe_slow);
16955 %}
16956
16957 instruct addD_reg(regD dst, regD src) %{
16958 predicate(UseAVX == 0);
16959 match(Set dst (AddD dst src));
16960
16961 format %{ "addsd $dst, $src" %}
16962 ins_cost(150);
16963 ins_encode %{
16964 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
16965 %}
16966 ins_pipe(pipe_slow);
16967 %}
16968
16969 instruct addD_mem(regD dst, memory src) %{
16970 predicate(UseAVX == 0);
16971 match(Set dst (AddD dst (LoadD src)));
16972
16973 format %{ "addsd $dst, $src" %}
16974 ins_cost(150);
16975 ins_encode %{
16976 __ addsd($dst$$XMMRegister, $src$$Address);
16977 %}
16978 ins_pipe(pipe_slow);
16979 %}
16980
16981 instruct addD_imm(regD dst, immD con) %{
16982 predicate(UseAVX == 0);
16983 match(Set dst (AddD dst con));
16984 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
16985 ins_cost(150);
16986 ins_encode %{
16987 __ addsd($dst$$XMMRegister, $constantaddress($con));
16988 %}
16989 ins_pipe(pipe_slow);
16990 %}
16991
16992 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
16993 predicate(UseAVX > 0);
16994 match(Set dst (AddD src1 src2));
16995
16996 format %{ "vaddsd $dst, $src1, $src2" %}
16997 ins_cost(150);
16998 ins_encode %{
16999 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17000 %}
17001 ins_pipe(pipe_slow);
17002 %}
17003
17004 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17005 predicate(UseAVX > 0);
17006 match(Set dst (AddD src1 (LoadD src2)));
17007
17008 format %{ "vaddsd $dst, $src1, $src2" %}
17009 ins_cost(150);
17010 ins_encode %{
17011 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17012 %}
17013 ins_pipe(pipe_slow);
17014 %}
17015
17016 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17017 predicate(UseAVX > 0);
17018 match(Set dst (AddD src con));
17019
17020 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17021 ins_cost(150);
17022 ins_encode %{
17023 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17024 %}
17025 ins_pipe(pipe_slow);
17026 %}
17027
17028 instruct subF_reg(regF dst, regF src) %{
17029 predicate(UseAVX == 0);
17030 match(Set dst (SubF dst src));
17031
17032 format %{ "subss $dst, $src" %}
17033 ins_cost(150);
17034 ins_encode %{
17035 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17036 %}
17037 ins_pipe(pipe_slow);
17038 %}
17039
17040 instruct subF_mem(regF dst, memory src) %{
17041 predicate(UseAVX == 0);
17042 match(Set dst (SubF dst (LoadF src)));
17043
17044 format %{ "subss $dst, $src" %}
17045 ins_cost(150);
17046 ins_encode %{
17047 __ subss($dst$$XMMRegister, $src$$Address);
17048 %}
17049 ins_pipe(pipe_slow);
17050 %}
17051
17052 instruct subF_imm(regF dst, immF con) %{
17053 predicate(UseAVX == 0);
17054 match(Set dst (SubF dst con));
17055 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17056 ins_cost(150);
17057 ins_encode %{
17058 __ subss($dst$$XMMRegister, $constantaddress($con));
17059 %}
17060 ins_pipe(pipe_slow);
17061 %}
17062
17063 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17064 predicate(UseAVX > 0);
17065 match(Set dst (SubF src1 src2));
17066
17067 format %{ "vsubss $dst, $src1, $src2" %}
17068 ins_cost(150);
17069 ins_encode %{
17070 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17071 %}
17072 ins_pipe(pipe_slow);
17073 %}
17074
17075 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17076 predicate(UseAVX > 0);
17077 match(Set dst (SubF src1 (LoadF src2)));
17078
17079 format %{ "vsubss $dst, $src1, $src2" %}
17080 ins_cost(150);
17081 ins_encode %{
17082 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17083 %}
17084 ins_pipe(pipe_slow);
17085 %}
17086
17087 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17088 predicate(UseAVX > 0);
17089 match(Set dst (SubF src con));
17090
17091 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17092 ins_cost(150);
17093 ins_encode %{
17094 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17095 %}
17096 ins_pipe(pipe_slow);
17097 %}
17098
17099 instruct subD_reg(regD dst, regD src) %{
17100 predicate(UseAVX == 0);
17101 match(Set dst (SubD dst src));
17102
17103 format %{ "subsd $dst, $src" %}
17104 ins_cost(150);
17105 ins_encode %{
17106 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17107 %}
17108 ins_pipe(pipe_slow);
17109 %}
17110
17111 instruct subD_mem(regD dst, memory src) %{
17112 predicate(UseAVX == 0);
17113 match(Set dst (SubD dst (LoadD src)));
17114
17115 format %{ "subsd $dst, $src" %}
17116 ins_cost(150);
17117 ins_encode %{
17118 __ subsd($dst$$XMMRegister, $src$$Address);
17119 %}
17120 ins_pipe(pipe_slow);
17121 %}
17122
17123 instruct subD_imm(regD dst, immD con) %{
17124 predicate(UseAVX == 0);
17125 match(Set dst (SubD dst con));
17126 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17127 ins_cost(150);
17128 ins_encode %{
17129 __ subsd($dst$$XMMRegister, $constantaddress($con));
17130 %}
17131 ins_pipe(pipe_slow);
17132 %}
17133
17134 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17135 predicate(UseAVX > 0);
17136 match(Set dst (SubD src1 src2));
17137
17138 format %{ "vsubsd $dst, $src1, $src2" %}
17139 ins_cost(150);
17140 ins_encode %{
17141 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17142 %}
17143 ins_pipe(pipe_slow);
17144 %}
17145
17146 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17147 predicate(UseAVX > 0);
17148 match(Set dst (SubD src1 (LoadD src2)));
17149
17150 format %{ "vsubsd $dst, $src1, $src2" %}
17151 ins_cost(150);
17152 ins_encode %{
17153 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17154 %}
17155 ins_pipe(pipe_slow);
17156 %}
17157
17158 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17159 predicate(UseAVX > 0);
17160 match(Set dst (SubD src con));
17161
17162 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17163 ins_cost(150);
17164 ins_encode %{
17165 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17166 %}
17167 ins_pipe(pipe_slow);
17168 %}
17169
17170 instruct mulF_reg(regF dst, regF src) %{
17171 predicate(UseAVX == 0);
17172 match(Set dst (MulF dst src));
17173
17174 format %{ "mulss $dst, $src" %}
17175 ins_cost(150);
17176 ins_encode %{
17177 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17178 %}
17179 ins_pipe(pipe_slow);
17180 %}
17181
17182 instruct mulF_mem(regF dst, memory src) %{
17183 predicate(UseAVX == 0);
17184 match(Set dst (MulF dst (LoadF src)));
17185
17186 format %{ "mulss $dst, $src" %}
17187 ins_cost(150);
17188 ins_encode %{
17189 __ mulss($dst$$XMMRegister, $src$$Address);
17190 %}
17191 ins_pipe(pipe_slow);
17192 %}
17193
17194 instruct mulF_imm(regF dst, immF con) %{
17195 predicate(UseAVX == 0);
17196 match(Set dst (MulF dst con));
17197 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17198 ins_cost(150);
17199 ins_encode %{
17200 __ mulss($dst$$XMMRegister, $constantaddress($con));
17201 %}
17202 ins_pipe(pipe_slow);
17203 %}
17204
17205 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17206 predicate(UseAVX > 0);
17207 match(Set dst (MulF src1 src2));
17208
17209 format %{ "vmulss $dst, $src1, $src2" %}
17210 ins_cost(150);
17211 ins_encode %{
17212 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17213 %}
17214 ins_pipe(pipe_slow);
17215 %}
17216
17217 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17218 predicate(UseAVX > 0);
17219 match(Set dst (MulF src1 (LoadF src2)));
17220
17221 format %{ "vmulss $dst, $src1, $src2" %}
17222 ins_cost(150);
17223 ins_encode %{
17224 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17225 %}
17226 ins_pipe(pipe_slow);
17227 %}
17228
17229 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17230 predicate(UseAVX > 0);
17231 match(Set dst (MulF src con));
17232
17233 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17234 ins_cost(150);
17235 ins_encode %{
17236 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17237 %}
17238 ins_pipe(pipe_slow);
17239 %}
17240
17241 instruct mulD_reg(regD dst, regD src) %{
17242 predicate(UseAVX == 0);
17243 match(Set dst (MulD dst src));
17244
17245 format %{ "mulsd $dst, $src" %}
17246 ins_cost(150);
17247 ins_encode %{
17248 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17249 %}
17250 ins_pipe(pipe_slow);
17251 %}
17252
17253 instruct mulD_mem(regD dst, memory src) %{
17254 predicate(UseAVX == 0);
17255 match(Set dst (MulD dst (LoadD src)));
17256
17257 format %{ "mulsd $dst, $src" %}
17258 ins_cost(150);
17259 ins_encode %{
17260 __ mulsd($dst$$XMMRegister, $src$$Address);
17261 %}
17262 ins_pipe(pipe_slow);
17263 %}
17264
17265 instruct mulD_imm(regD dst, immD con) %{
17266 predicate(UseAVX == 0);
17267 match(Set dst (MulD dst con));
17268 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17269 ins_cost(150);
17270 ins_encode %{
17271 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17272 %}
17273 ins_pipe(pipe_slow);
17274 %}
17275
17276 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17277 predicate(UseAVX > 0);
17278 match(Set dst (MulD src1 src2));
17279
17280 format %{ "vmulsd $dst, $src1, $src2" %}
17281 ins_cost(150);
17282 ins_encode %{
17283 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17284 %}
17285 ins_pipe(pipe_slow);
17286 %}
17287
17288 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17289 predicate(UseAVX > 0);
17290 match(Set dst (MulD src1 (LoadD src2)));
17291
17292 format %{ "vmulsd $dst, $src1, $src2" %}
17293 ins_cost(150);
17294 ins_encode %{
17295 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17296 %}
17297 ins_pipe(pipe_slow);
17298 %}
17299
17300 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17301 predicate(UseAVX > 0);
17302 match(Set dst (MulD src con));
17303
17304 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17305 ins_cost(150);
17306 ins_encode %{
17307 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17308 %}
17309 ins_pipe(pipe_slow);
17310 %}
17311
17312 instruct divF_reg(regF dst, regF src) %{
17313 predicate(UseAVX == 0);
17314 match(Set dst (DivF dst src));
17315
17316 format %{ "divss $dst, $src" %}
17317 ins_cost(150);
17318 ins_encode %{
17319 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17320 %}
17321 ins_pipe(pipe_slow);
17322 %}
17323
17324 instruct divF_mem(regF dst, memory src) %{
17325 predicate(UseAVX == 0);
17326 match(Set dst (DivF dst (LoadF src)));
17327
17328 format %{ "divss $dst, $src" %}
17329 ins_cost(150);
17330 ins_encode %{
17331 __ divss($dst$$XMMRegister, $src$$Address);
17332 %}
17333 ins_pipe(pipe_slow);
17334 %}
17335
17336 instruct divF_imm(regF dst, immF con) %{
17337 predicate(UseAVX == 0);
17338 match(Set dst (DivF dst con));
17339 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17340 ins_cost(150);
17341 ins_encode %{
17342 __ divss($dst$$XMMRegister, $constantaddress($con));
17343 %}
17344 ins_pipe(pipe_slow);
17345 %}
17346
17347 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17348 predicate(UseAVX > 0);
17349 match(Set dst (DivF src1 src2));
17350
17351 format %{ "vdivss $dst, $src1, $src2" %}
17352 ins_cost(150);
17353 ins_encode %{
17354 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17355 %}
17356 ins_pipe(pipe_slow);
17357 %}
17358
17359 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17360 predicate(UseAVX > 0);
17361 match(Set dst (DivF src1 (LoadF src2)));
17362
17363 format %{ "vdivss $dst, $src1, $src2" %}
17364 ins_cost(150);
17365 ins_encode %{
17366 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17367 %}
17368 ins_pipe(pipe_slow);
17369 %}
17370
17371 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17372 predicate(UseAVX > 0);
17373 match(Set dst (DivF src con));
17374
17375 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17376 ins_cost(150);
17377 ins_encode %{
17378 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17379 %}
17380 ins_pipe(pipe_slow);
17381 %}
17382
17383 instruct divD_reg(regD dst, regD src) %{
17384 predicate(UseAVX == 0);
17385 match(Set dst (DivD dst src));
17386
17387 format %{ "divsd $dst, $src" %}
17388 ins_cost(150);
17389 ins_encode %{
17390 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17391 %}
17392 ins_pipe(pipe_slow);
17393 %}
17394
17395 instruct divD_mem(regD dst, memory src) %{
17396 predicate(UseAVX == 0);
17397 match(Set dst (DivD dst (LoadD src)));
17398
17399 format %{ "divsd $dst, $src" %}
17400 ins_cost(150);
17401 ins_encode %{
17402 __ divsd($dst$$XMMRegister, $src$$Address);
17403 %}
17404 ins_pipe(pipe_slow);
17405 %}
17406
17407 instruct divD_imm(regD dst, immD con) %{
17408 predicate(UseAVX == 0);
17409 match(Set dst (DivD dst con));
17410 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17411 ins_cost(150);
17412 ins_encode %{
17413 __ divsd($dst$$XMMRegister, $constantaddress($con));
17414 %}
17415 ins_pipe(pipe_slow);
17416 %}
17417
17418 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17419 predicate(UseAVX > 0);
17420 match(Set dst (DivD src1 src2));
17421
17422 format %{ "vdivsd $dst, $src1, $src2" %}
17423 ins_cost(150);
17424 ins_encode %{
17425 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17426 %}
17427 ins_pipe(pipe_slow);
17428 %}
17429
17430 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17431 predicate(UseAVX > 0);
17432 match(Set dst (DivD src1 (LoadD src2)));
17433
17434 format %{ "vdivsd $dst, $src1, $src2" %}
17435 ins_cost(150);
17436 ins_encode %{
17437 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17438 %}
17439 ins_pipe(pipe_slow);
17440 %}
17441
17442 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17443 predicate(UseAVX > 0);
17444 match(Set dst (DivD src con));
17445
17446 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17447 ins_cost(150);
17448 ins_encode %{
17449 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17450 %}
17451 ins_pipe(pipe_slow);
17452 %}
17453
17454 instruct absF_reg(regF dst) %{
17455 predicate(UseAVX == 0);
17456 match(Set dst (AbsF dst));
17457 ins_cost(150);
17458 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17459 ins_encode %{
17460 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17461 %}
17462 ins_pipe(pipe_slow);
17463 %}
17464
17465 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17466 predicate(UseAVX > 0);
17467 match(Set dst (AbsF src));
17468 ins_cost(150);
17469 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17470 ins_encode %{
17471 int vlen_enc = Assembler::AVX_128bit;
17472 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17473 ExternalAddress(float_signmask()), vlen_enc);
17474 %}
17475 ins_pipe(pipe_slow);
17476 %}
17477
17478 instruct absD_reg(regD dst) %{
17479 predicate(UseAVX == 0);
17480 match(Set dst (AbsD dst));
17481 ins_cost(150);
17482 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17483 "# abs double by sign masking" %}
17484 ins_encode %{
17485 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17486 %}
17487 ins_pipe(pipe_slow);
17488 %}
17489
17490 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17491 predicate(UseAVX > 0);
17492 match(Set dst (AbsD src));
17493 ins_cost(150);
17494 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17495 "# abs double by sign masking" %}
17496 ins_encode %{
17497 int vlen_enc = Assembler::AVX_128bit;
17498 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17499 ExternalAddress(double_signmask()), vlen_enc);
17500 %}
17501 ins_pipe(pipe_slow);
17502 %}
17503
17504 instruct negF_reg(regF dst) %{
17505 predicate(UseAVX == 0);
17506 match(Set dst (NegF dst));
17507 ins_cost(150);
17508 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17509 ins_encode %{
17510 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17511 %}
17512 ins_pipe(pipe_slow);
17513 %}
17514
17515 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17516 predicate(UseAVX > 0);
17517 match(Set dst (NegF src));
17518 ins_cost(150);
17519 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17520 ins_encode %{
17521 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17522 ExternalAddress(float_signflip()));
17523 %}
17524 ins_pipe(pipe_slow);
17525 %}
17526
17527 instruct negD_reg(regD dst) %{
17528 predicate(UseAVX == 0);
17529 match(Set dst (NegD dst));
17530 ins_cost(150);
17531 format %{ "xorpd $dst, [0x8000000000000000]\t"
17532 "# neg double by sign flipping" %}
17533 ins_encode %{
17534 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17535 %}
17536 ins_pipe(pipe_slow);
17537 %}
17538
17539 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17540 predicate(UseAVX > 0);
17541 match(Set dst (NegD src));
17542 ins_cost(150);
17543 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17544 "# neg double by sign flipping" %}
17545 ins_encode %{
17546 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17547 ExternalAddress(double_signflip()));
17548 %}
17549 ins_pipe(pipe_slow);
17550 %}
17551
17552 // sqrtss instruction needs destination register to be pre initialized for best performance
17553 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17554 instruct sqrtF_reg(regF dst) %{
17555 match(Set dst (SqrtF dst));
17556 format %{ "sqrtss $dst, $dst" %}
17557 ins_encode %{
17558 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17559 %}
17560 ins_pipe(pipe_slow);
17561 %}
17562
17563 // sqrtsd instruction needs destination register to be pre initialized for best performance
17564 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17565 instruct sqrtD_reg(regD dst) %{
17566 match(Set dst (SqrtD dst));
17567 format %{ "sqrtsd $dst, $dst" %}
17568 ins_encode %{
17569 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17570 %}
17571 ins_pipe(pipe_slow);
17572 %}
17573
17574 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17575 effect(TEMP tmp);
17576 match(Set dst (ConvF2HF src));
17577 ins_cost(125);
17578 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17579 ins_encode %{
17580 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17581 %}
17582 ins_pipe( pipe_slow );
17583 %}
17584
17585 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17586 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17587 effect(TEMP ktmp, TEMP rtmp);
17588 match(Set mem (StoreC mem (ConvF2HF src)));
17589 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17590 ins_encode %{
17591 __ movl($rtmp$$Register, 0x1);
17592 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17593 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17594 %}
17595 ins_pipe( pipe_slow );
17596 %}
17597
17598 instruct vconvF2HF(vec dst, vec src) %{
17599 match(Set dst (VectorCastF2HF src));
17600 format %{ "vector_conv_F2HF $dst $src" %}
17601 ins_encode %{
17602 int vlen_enc = vector_length_encoding(this, $src);
17603 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17604 %}
17605 ins_pipe( pipe_slow );
17606 %}
17607
17608 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17609 predicate(n->as_StoreVector()->memory_size() >= 16);
17610 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17611 format %{ "vcvtps2ph $mem,$src" %}
17612 ins_encode %{
17613 int vlen_enc = vector_length_encoding(this, $src);
17614 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17615 %}
17616 ins_pipe( pipe_slow );
17617 %}
17618
17619 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17620 match(Set dst (ConvHF2F src));
17621 format %{ "vcvtph2ps $dst,$src" %}
17622 ins_encode %{
17623 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17624 %}
17625 ins_pipe( pipe_slow );
17626 %}
17627
17628 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17629 match(Set dst (VectorCastHF2F (LoadVector mem)));
17630 format %{ "vcvtph2ps $dst,$mem" %}
17631 ins_encode %{
17632 int vlen_enc = vector_length_encoding(this);
17633 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17634 %}
17635 ins_pipe( pipe_slow );
17636 %}
17637
17638 instruct vconvHF2F(vec dst, vec src) %{
17639 match(Set dst (VectorCastHF2F src));
17640 ins_cost(125);
17641 format %{ "vector_conv_HF2F $dst,$src" %}
17642 ins_encode %{
17643 int vlen_enc = vector_length_encoding(this);
17644 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17645 %}
17646 ins_pipe( pipe_slow );
17647 %}
17648
17649 // ---------------------------------------- VectorReinterpret ------------------------------------
17650 instruct reinterpret_mask(kReg dst) %{
17651 predicate(n->bottom_type()->isa_pvectmask() &&
17652 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17653 match(Set dst (VectorReinterpret dst));
17654 ins_cost(125);
17655 format %{ "vector_reinterpret $dst\t!" %}
17656 ins_encode %{
17657 // empty
17658 %}
17659 ins_pipe( pipe_slow );
17660 %}
17661
17662 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17663 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17664 n->bottom_type()->isa_pvectmask() &&
17665 n->in(1)->bottom_type()->isa_pvectmask() &&
17666 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
17667 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17668 match(Set dst (VectorReinterpret src));
17669 effect(TEMP xtmp);
17670 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17671 ins_encode %{
17672 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17673 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17674 assert(src_sz == dst_sz , "src and dst size mismatch");
17675 int vlen_enc = vector_length_encoding(src_sz);
17676 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17677 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17678 %}
17679 ins_pipe( pipe_slow );
17680 %}
17681
17682 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17683 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17684 n->bottom_type()->isa_pvectmask() &&
17685 n->in(1)->bottom_type()->isa_pvectmask() &&
17686 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
17687 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
17688 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17689 match(Set dst (VectorReinterpret src));
17690 effect(TEMP xtmp);
17691 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17692 ins_encode %{
17693 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17694 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17695 assert(src_sz == dst_sz , "src and dst size mismatch");
17696 int vlen_enc = vector_length_encoding(src_sz);
17697 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17698 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17699 %}
17700 ins_pipe( pipe_slow );
17701 %}
17702
17703 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
17704 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17705 n->bottom_type()->isa_pvectmask() &&
17706 n->in(1)->bottom_type()->isa_pvectmask() &&
17707 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
17708 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
17709 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17710 match(Set dst (VectorReinterpret src));
17711 effect(TEMP xtmp);
17712 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
17713 ins_encode %{
17714 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
17715 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17716 assert(src_sz == dst_sz , "src and dst size mismatch");
17717 int vlen_enc = vector_length_encoding(src_sz);
17718 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17719 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17720 %}
17721 ins_pipe( pipe_slow );
17722 %}
17723
17724 instruct reinterpret(vec dst) %{
17725 predicate(!n->bottom_type()->isa_pvectmask() &&
17726 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
17727 match(Set dst (VectorReinterpret dst));
17728 ins_cost(125);
17729 format %{ "vector_reinterpret $dst\t!" %}
17730 ins_encode %{
17731 // empty
17732 %}
17733 ins_pipe( pipe_slow );
17734 %}
17735
17736 instruct reinterpret_expand(vec dst, vec src) %{
17737 predicate(UseAVX == 0 &&
17738 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17739 match(Set dst (VectorReinterpret src));
17740 ins_cost(125);
17741 effect(TEMP dst);
17742 format %{ "vector_reinterpret_expand $dst,$src" %}
17743 ins_encode %{
17744 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
17745 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
17746
17747 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
17748 if (src_vlen_in_bytes == 4) {
17749 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
17750 } else {
17751 assert(src_vlen_in_bytes == 8, "");
17752 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
17753 }
17754 __ pand($dst$$XMMRegister, $src$$XMMRegister);
17755 %}
17756 ins_pipe( pipe_slow );
17757 %}
17758
17759 instruct vreinterpret_expand4(legVec dst, vec src) %{
17760 predicate(UseAVX > 0 &&
17761 !n->bottom_type()->isa_pvectmask() &&
17762 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
17763 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17764 match(Set dst (VectorReinterpret src));
17765 ins_cost(125);
17766 format %{ "vector_reinterpret_expand $dst,$src" %}
17767 ins_encode %{
17768 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
17769 %}
17770 ins_pipe( pipe_slow );
17771 %}
17772
17773
17774 instruct vreinterpret_expand(legVec dst, vec src) %{
17775 predicate(UseAVX > 0 &&
17776 !n->bottom_type()->isa_pvectmask() &&
17777 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
17778 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17779 match(Set dst (VectorReinterpret src));
17780 ins_cost(125);
17781 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
17782 ins_encode %{
17783 switch (Matcher::vector_length_in_bytes(this, $src)) {
17784 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
17785 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17786 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17787 default: ShouldNotReachHere();
17788 }
17789 %}
17790 ins_pipe( pipe_slow );
17791 %}
17792
17793 instruct reinterpret_shrink(vec dst, legVec src) %{
17794 predicate(!n->bottom_type()->isa_pvectmask() &&
17795 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
17796 match(Set dst (VectorReinterpret src));
17797 ins_cost(125);
17798 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
17799 ins_encode %{
17800 switch (Matcher::vector_length_in_bytes(this)) {
17801 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
17802 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
17803 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17804 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17805 default: ShouldNotReachHere();
17806 }
17807 %}
17808 ins_pipe( pipe_slow );
17809 %}
17810
17811 // ----------------------------------------------------------------------------------------------------
17812
17813 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
17814 match(Set dst (RoundDoubleMode src rmode));
17815 format %{ "roundsd $dst,$src" %}
17816 ins_cost(150);
17817 ins_encode %{
17818 assert(UseSSE >= 4, "required");
17819 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
17820 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
17821 }
17822 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
17823 %}
17824 ins_pipe(pipe_slow);
17825 %}
17826
17827 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
17828 match(Set dst (RoundDoubleMode con rmode));
17829 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
17830 ins_cost(150);
17831 ins_encode %{
17832 assert(UseSSE >= 4, "required");
17833 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
17834 %}
17835 ins_pipe(pipe_slow);
17836 %}
17837
17838 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
17839 predicate(Matcher::vector_length(n) < 8);
17840 match(Set dst (RoundDoubleModeV src rmode));
17841 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
17842 ins_encode %{
17843 assert(UseAVX > 0, "required");
17844 int vlen_enc = vector_length_encoding(this);
17845 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
17846 %}
17847 ins_pipe( pipe_slow );
17848 %}
17849
17850 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
17851 predicate(Matcher::vector_length(n) == 8);
17852 match(Set dst (RoundDoubleModeV src rmode));
17853 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
17854 ins_encode %{
17855 assert(UseAVX > 2, "required");
17856 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
17857 %}
17858 ins_pipe( pipe_slow );
17859 %}
17860
17861 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
17862 predicate(Matcher::vector_length(n) < 8);
17863 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17864 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
17865 ins_encode %{
17866 assert(UseAVX > 0, "required");
17867 int vlen_enc = vector_length_encoding(this);
17868 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
17869 %}
17870 ins_pipe( pipe_slow );
17871 %}
17872
17873 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
17874 predicate(Matcher::vector_length(n) == 8);
17875 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17876 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
17877 ins_encode %{
17878 assert(UseAVX > 2, "required");
17879 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
17880 %}
17881 ins_pipe( pipe_slow );
17882 %}
17883
17884 instruct onspinwait() %{
17885 match(OnSpinWait);
17886 ins_cost(200);
17887
17888 format %{
17889 $$template
17890 $$emit$$"pause\t! membar_onspinwait"
17891 %}
17892 ins_encode %{
17893 __ pause();
17894 %}
17895 ins_pipe(pipe_slow);
17896 %}
17897
17898 // a * b + c
17899 instruct fmaD_reg(regD a, regD b, regD c) %{
17900 match(Set c (FmaD c (Binary a b)));
17901 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
17902 ins_cost(150);
17903 ins_encode %{
17904 assert(UseFMA, "Needs FMA instructions support.");
17905 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17906 %}
17907 ins_pipe( pipe_slow );
17908 %}
17909
17910 // a * b + c
17911 instruct fmaF_reg(regF a, regF b, regF c) %{
17912 match(Set c (FmaF c (Binary a b)));
17913 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
17914 ins_cost(150);
17915 ins_encode %{
17916 assert(UseFMA, "Needs FMA instructions support.");
17917 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17918 %}
17919 ins_pipe( pipe_slow );
17920 %}
17921
17922 // ====================VECTOR INSTRUCTIONS=====================================
17923
17924 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
17925 instruct MoveVec2Leg(legVec dst, vec src) %{
17926 match(Set dst src);
17927 format %{ "" %}
17928 ins_encode %{
17929 ShouldNotReachHere();
17930 %}
17931 ins_pipe( fpu_reg_reg );
17932 %}
17933
17934 instruct MoveLeg2Vec(vec dst, legVec src) %{
17935 match(Set dst src);
17936 format %{ "" %}
17937 ins_encode %{
17938 ShouldNotReachHere();
17939 %}
17940 ins_pipe( fpu_reg_reg );
17941 %}
17942
17943 // ============================================================================
17944
17945 // Load vectors generic operand pattern
17946 instruct loadV(vec dst, memory mem) %{
17947 match(Set dst (LoadVector mem));
17948 ins_cost(125);
17949 format %{ "load_vector $dst,$mem" %}
17950 ins_encode %{
17951 BasicType bt = Matcher::vector_element_basic_type(this);
17952 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
17953 %}
17954 ins_pipe( pipe_slow );
17955 %}
17956
17957 // Store vectors generic operand pattern.
17958 instruct storeV(memory mem, vec src) %{
17959 match(Set mem (StoreVector mem src));
17960 ins_cost(145);
17961 format %{ "store_vector $mem,$src\n\t" %}
17962 ins_encode %{
17963 switch (Matcher::vector_length_in_bytes(this, $src)) {
17964 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
17965 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
17966 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
17967 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
17968 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
17969 default: ShouldNotReachHere();
17970 }
17971 %}
17972 ins_pipe( pipe_slow );
17973 %}
17974
17975 // ---------------------------------------- Gather ------------------------------------
17976
17977 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
17978
17979 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
17980 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
17981 Matcher::vector_length_in_bytes(n) <= 32);
17982 match(Set dst (LoadVectorGather mem idx));
17983 effect(TEMP dst, TEMP tmp, TEMP mask);
17984 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
17985 ins_encode %{
17986 int vlen_enc = vector_length_encoding(this);
17987 BasicType elem_bt = Matcher::vector_element_basic_type(this);
17988 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
17989 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
17990 __ lea($tmp$$Register, $mem$$Address);
17991 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
17992 %}
17993 ins_pipe( pipe_slow );
17994 %}
17995
17996
17997 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
17998 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
17999 !is_subword_type(Matcher::vector_element_basic_type(n)));
18000 match(Set dst (LoadVectorGather mem idx));
18001 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18002 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18003 ins_encode %{
18004 int vlen_enc = vector_length_encoding(this);
18005 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18006 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18007 __ lea($tmp$$Register, $mem$$Address);
18008 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18009 %}
18010 ins_pipe( pipe_slow );
18011 %}
18012
18013 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18014 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18015 !is_subword_type(Matcher::vector_element_basic_type(n)));
18016 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18017 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18018 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18019 ins_encode %{
18020 assert(UseAVX > 2, "sanity");
18021 int vlen_enc = vector_length_encoding(this);
18022 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18023 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18024 // Note: Since gather instruction partially updates the opmask register used
18025 // for predication hense moving mask operand to a temporary.
18026 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18027 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18028 __ lea($tmp$$Register, $mem$$Address);
18029 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18030 %}
18031 ins_pipe( pipe_slow );
18032 %}
18033
18034 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18035 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18036 match(Set dst (LoadVectorGather mem idx_base));
18037 effect(TEMP tmp, TEMP rtmp);
18038 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18039 ins_encode %{
18040 int vlen_enc = vector_length_encoding(this);
18041 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18042 __ lea($tmp$$Register, $mem$$Address);
18043 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18044 %}
18045 ins_pipe( pipe_slow );
18046 %}
18047
18048 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18049 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18050 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18051 match(Set dst (LoadVectorGather mem idx_base));
18052 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18053 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18054 ins_encode %{
18055 int vlen_enc = vector_length_encoding(this);
18056 int vector_len = Matcher::vector_length(this);
18057 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18058 __ lea($tmp$$Register, $mem$$Address);
18059 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18060 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18061 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18062 %}
18063 ins_pipe( pipe_slow );
18064 %}
18065
18066 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18067 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18068 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18069 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18070 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18071 ins_encode %{
18072 int vlen_enc = vector_length_encoding(this);
18073 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18074 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18075 __ lea($tmp$$Register, $mem$$Address);
18076 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18077 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18078 %}
18079 ins_pipe( pipe_slow );
18080 %}
18081
18082 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18083 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18084 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18085 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18086 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18087 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18088 ins_encode %{
18089 int vlen_enc = vector_length_encoding(this);
18090 int vector_len = Matcher::vector_length(this);
18091 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18092 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18093 __ lea($tmp$$Register, $mem$$Address);
18094 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18095 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18096 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18097 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18098 %}
18099 ins_pipe( pipe_slow );
18100 %}
18101
18102 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18103 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18104 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18105 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18106 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18107 ins_encode %{
18108 int vlen_enc = vector_length_encoding(this);
18109 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18110 __ lea($tmp$$Register, $mem$$Address);
18111 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18112 if (elem_bt == T_SHORT) {
18113 __ movl($mask_idx$$Register, 0x55555555);
18114 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18115 }
18116 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18117 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18118 %}
18119 ins_pipe( pipe_slow );
18120 %}
18121
18122 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18123 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18124 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18125 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18126 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18127 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18128 ins_encode %{
18129 int vlen_enc = vector_length_encoding(this);
18130 int vector_len = Matcher::vector_length(this);
18131 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18132 __ lea($tmp$$Register, $mem$$Address);
18133 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18134 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18135 if (elem_bt == T_SHORT) {
18136 __ movl($mask_idx$$Register, 0x55555555);
18137 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18138 }
18139 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18140 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18141 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18142 %}
18143 ins_pipe( pipe_slow );
18144 %}
18145
18146 // ====================Scatter=======================================
18147
18148 // Scatter INT, LONG, FLOAT, DOUBLE
18149
18150 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18151 predicate(UseAVX > 2);
18152 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18153 effect(TEMP tmp, TEMP ktmp);
18154 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18155 ins_encode %{
18156 int vlen_enc = vector_length_encoding(this, $src);
18157 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18158
18159 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18160 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18161
18162 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18163 __ lea($tmp$$Register, $mem$$Address);
18164 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18165 %}
18166 ins_pipe( pipe_slow );
18167 %}
18168
18169 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18170 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18171 effect(TEMP tmp, TEMP ktmp);
18172 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18173 ins_encode %{
18174 int vlen_enc = vector_length_encoding(this, $src);
18175 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18176 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18177 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18178 // Note: Since scatter instruction partially updates the opmask register used
18179 // for predication hense moving mask operand to a temporary.
18180 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18181 __ lea($tmp$$Register, $mem$$Address);
18182 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18183 %}
18184 ins_pipe( pipe_slow );
18185 %}
18186
18187 // ====================REPLICATE=======================================
18188
18189 // Replicate byte scalar to be vector
18190 instruct vReplB_reg(vec dst, rRegI src) %{
18191 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18192 match(Set dst (Replicate src));
18193 format %{ "replicateB $dst,$src" %}
18194 ins_encode %{
18195 uint vlen = Matcher::vector_length(this);
18196 if (UseAVX >= 2) {
18197 int vlen_enc = vector_length_encoding(this);
18198 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18199 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18200 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18201 } else {
18202 __ movdl($dst$$XMMRegister, $src$$Register);
18203 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18204 }
18205 } else {
18206 assert(UseAVX < 2, "");
18207 __ movdl($dst$$XMMRegister, $src$$Register);
18208 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18209 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18210 if (vlen >= 16) {
18211 assert(vlen == 16, "");
18212 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18213 }
18214 }
18215 %}
18216 ins_pipe( pipe_slow );
18217 %}
18218
18219 instruct ReplB_mem(vec dst, memory mem) %{
18220 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18221 match(Set dst (Replicate (LoadB mem)));
18222 format %{ "replicateB $dst,$mem" %}
18223 ins_encode %{
18224 int vlen_enc = vector_length_encoding(this);
18225 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18226 %}
18227 ins_pipe( pipe_slow );
18228 %}
18229
18230 // ====================ReplicateS=======================================
18231
18232 instruct vReplS_reg(vec dst, rRegI src) %{
18233 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18234 match(Set dst (Replicate src));
18235 format %{ "replicateS $dst,$src" %}
18236 ins_encode %{
18237 uint vlen = Matcher::vector_length(this);
18238 int vlen_enc = vector_length_encoding(this);
18239 if (UseAVX >= 2) {
18240 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18241 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18242 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18243 } else {
18244 __ movdl($dst$$XMMRegister, $src$$Register);
18245 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18246 }
18247 } else {
18248 assert(UseAVX < 2, "");
18249 __ movdl($dst$$XMMRegister, $src$$Register);
18250 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18251 if (vlen >= 8) {
18252 assert(vlen == 8, "");
18253 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18254 }
18255 }
18256 %}
18257 ins_pipe( pipe_slow );
18258 %}
18259
18260 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18261 match(Set dst (Replicate con));
18262 effect(TEMP rtmp);
18263 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18264 ins_encode %{
18265 int vlen_enc = vector_length_encoding(this);
18266 BasicType bt = Matcher::vector_element_basic_type(this);
18267 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18268 __ movl($rtmp$$Register, $con$$constant);
18269 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18270 %}
18271 ins_pipe( pipe_slow );
18272 %}
18273
18274 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18275 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18276 match(Set dst (Replicate src));
18277 effect(TEMP rtmp);
18278 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18279 ins_encode %{
18280 int vlen_enc = vector_length_encoding(this);
18281 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18282 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18283 %}
18284 ins_pipe( pipe_slow );
18285 %}
18286
18287 instruct ReplS_mem(vec dst, memory mem) %{
18288 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18289 match(Set dst (Replicate (LoadS mem)));
18290 format %{ "replicateS $dst,$mem" %}
18291 ins_encode %{
18292 int vlen_enc = vector_length_encoding(this);
18293 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18294 %}
18295 ins_pipe( pipe_slow );
18296 %}
18297
18298 // ====================ReplicateI=======================================
18299
18300 instruct ReplI_reg(vec dst, rRegI src) %{
18301 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18302 match(Set dst (Replicate src));
18303 format %{ "replicateI $dst,$src" %}
18304 ins_encode %{
18305 uint vlen = Matcher::vector_length(this);
18306 int vlen_enc = vector_length_encoding(this);
18307 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18308 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18309 } else if (VM_Version::supports_avx2()) {
18310 __ movdl($dst$$XMMRegister, $src$$Register);
18311 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18312 } else {
18313 __ movdl($dst$$XMMRegister, $src$$Register);
18314 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18315 }
18316 %}
18317 ins_pipe( pipe_slow );
18318 %}
18319
18320 instruct ReplI_mem(vec dst, memory mem) %{
18321 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18322 match(Set dst (Replicate (LoadI mem)));
18323 format %{ "replicateI $dst,$mem" %}
18324 ins_encode %{
18325 int vlen_enc = vector_length_encoding(this);
18326 if (VM_Version::supports_avx2()) {
18327 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18328 } else if (VM_Version::supports_avx()) {
18329 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18330 } else {
18331 __ movdl($dst$$XMMRegister, $mem$$Address);
18332 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18333 }
18334 %}
18335 ins_pipe( pipe_slow );
18336 %}
18337
18338 instruct ReplI_imm(vec dst, immI con) %{
18339 predicate(Matcher::is_non_long_integral_vector(n));
18340 match(Set dst (Replicate con));
18341 format %{ "replicateI $dst,$con" %}
18342 ins_encode %{
18343 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18344 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18345 type2aelembytes(Matcher::vector_element_basic_type(this))));
18346 BasicType bt = Matcher::vector_element_basic_type(this);
18347 int vlen = Matcher::vector_length_in_bytes(this);
18348 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18349 %}
18350 ins_pipe( pipe_slow );
18351 %}
18352
18353 // Replicate scalar zero to be vector
18354 instruct ReplI_zero(vec dst, immI_0 zero) %{
18355 predicate(Matcher::is_non_long_integral_vector(n));
18356 match(Set dst (Replicate zero));
18357 format %{ "replicateI $dst,$zero" %}
18358 ins_encode %{
18359 int vlen_enc = vector_length_encoding(this);
18360 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18361 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18362 } else {
18363 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18364 }
18365 %}
18366 ins_pipe( fpu_reg_reg );
18367 %}
18368
18369 instruct ReplI_M1(vec dst, immI_M1 con) %{
18370 predicate(Matcher::is_non_long_integral_vector(n));
18371 match(Set dst (Replicate con));
18372 format %{ "vallones $dst" %}
18373 ins_encode %{
18374 int vector_len = vector_length_encoding(this);
18375 __ vallones($dst$$XMMRegister, vector_len);
18376 %}
18377 ins_pipe( pipe_slow );
18378 %}
18379
18380 // ====================ReplicateL=======================================
18381
18382 // Replicate long (8 byte) scalar to be vector
18383 instruct ReplL_reg(vec dst, rRegL src) %{
18384 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18385 match(Set dst (Replicate src));
18386 format %{ "replicateL $dst,$src" %}
18387 ins_encode %{
18388 int vlen = Matcher::vector_length(this);
18389 int vlen_enc = vector_length_encoding(this);
18390 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18391 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18392 } else if (VM_Version::supports_avx2()) {
18393 __ movdq($dst$$XMMRegister, $src$$Register);
18394 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18395 } else {
18396 __ movdq($dst$$XMMRegister, $src$$Register);
18397 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18398 }
18399 %}
18400 ins_pipe( pipe_slow );
18401 %}
18402
18403 instruct ReplL_mem(vec dst, memory mem) %{
18404 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18405 match(Set dst (Replicate (LoadL mem)));
18406 format %{ "replicateL $dst,$mem" %}
18407 ins_encode %{
18408 int vlen_enc = vector_length_encoding(this);
18409 if (VM_Version::supports_avx2()) {
18410 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18411 } else if (VM_Version::supports_sse3()) {
18412 __ movddup($dst$$XMMRegister, $mem$$Address);
18413 } else {
18414 __ movq($dst$$XMMRegister, $mem$$Address);
18415 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18416 }
18417 %}
18418 ins_pipe( pipe_slow );
18419 %}
18420
18421 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18422 instruct ReplL_imm(vec dst, immL con) %{
18423 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18424 match(Set dst (Replicate con));
18425 format %{ "replicateL $dst,$con" %}
18426 ins_encode %{
18427 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18428 int vlen = Matcher::vector_length_in_bytes(this);
18429 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18430 %}
18431 ins_pipe( pipe_slow );
18432 %}
18433
18434 instruct ReplL_zero(vec dst, immL0 zero) %{
18435 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18436 match(Set dst (Replicate zero));
18437 format %{ "replicateL $dst,$zero" %}
18438 ins_encode %{
18439 int vlen_enc = vector_length_encoding(this);
18440 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18441 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18442 } else {
18443 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18444 }
18445 %}
18446 ins_pipe( fpu_reg_reg );
18447 %}
18448
18449 instruct ReplL_M1(vec dst, immL_M1 con) %{
18450 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18451 match(Set dst (Replicate con));
18452 format %{ "vallones $dst" %}
18453 ins_encode %{
18454 int vector_len = vector_length_encoding(this);
18455 __ vallones($dst$$XMMRegister, vector_len);
18456 %}
18457 ins_pipe( pipe_slow );
18458 %}
18459
18460 // ====================ReplicateF=======================================
18461
18462 instruct vReplF_reg(vec dst, vlRegF src) %{
18463 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18464 match(Set dst (Replicate src));
18465 format %{ "replicateF $dst,$src" %}
18466 ins_encode %{
18467 uint vlen = Matcher::vector_length(this);
18468 int vlen_enc = vector_length_encoding(this);
18469 if (vlen <= 4) {
18470 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18471 } else if (VM_Version::supports_avx2()) {
18472 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18473 } else {
18474 assert(vlen == 8, "sanity");
18475 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18476 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18477 }
18478 %}
18479 ins_pipe( pipe_slow );
18480 %}
18481
18482 instruct ReplF_reg(vec dst, vlRegF src) %{
18483 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18484 match(Set dst (Replicate src));
18485 format %{ "replicateF $dst,$src" %}
18486 ins_encode %{
18487 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18488 %}
18489 ins_pipe( pipe_slow );
18490 %}
18491
18492 instruct ReplF_mem(vec dst, memory mem) %{
18493 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18494 match(Set dst (Replicate (LoadF mem)));
18495 format %{ "replicateF $dst,$mem" %}
18496 ins_encode %{
18497 int vlen_enc = vector_length_encoding(this);
18498 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18499 %}
18500 ins_pipe( pipe_slow );
18501 %}
18502
18503 // Replicate float scalar immediate to be vector by loading from const table.
18504 instruct ReplF_imm(vec dst, immF con) %{
18505 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18506 match(Set dst (Replicate con));
18507 format %{ "replicateF $dst,$con" %}
18508 ins_encode %{
18509 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18510 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18511 int vlen = Matcher::vector_length_in_bytes(this);
18512 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18513 %}
18514 ins_pipe( pipe_slow );
18515 %}
18516
18517 instruct ReplF_zero(vec dst, immF0 zero) %{
18518 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18519 match(Set dst (Replicate zero));
18520 format %{ "replicateF $dst,$zero" %}
18521 ins_encode %{
18522 int vlen_enc = vector_length_encoding(this);
18523 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18524 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18525 } else {
18526 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18527 }
18528 %}
18529 ins_pipe( fpu_reg_reg );
18530 %}
18531
18532 // ====================ReplicateD=======================================
18533
18534 // Replicate double (8 bytes) scalar to be vector
18535 instruct vReplD_reg(vec dst, vlRegD src) %{
18536 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18537 match(Set dst (Replicate src));
18538 format %{ "replicateD $dst,$src" %}
18539 ins_encode %{
18540 uint vlen = Matcher::vector_length(this);
18541 int vlen_enc = vector_length_encoding(this);
18542 if (vlen <= 2) {
18543 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18544 } else if (VM_Version::supports_avx2()) {
18545 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18546 } else {
18547 assert(vlen == 4, "sanity");
18548 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18549 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18550 }
18551 %}
18552 ins_pipe( pipe_slow );
18553 %}
18554
18555 instruct ReplD_reg(vec dst, vlRegD src) %{
18556 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18557 match(Set dst (Replicate src));
18558 format %{ "replicateD $dst,$src" %}
18559 ins_encode %{
18560 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18561 %}
18562 ins_pipe( pipe_slow );
18563 %}
18564
18565 instruct ReplD_mem(vec dst, memory mem) %{
18566 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18567 match(Set dst (Replicate (LoadD mem)));
18568 format %{ "replicateD $dst,$mem" %}
18569 ins_encode %{
18570 if (Matcher::vector_length(this) >= 4) {
18571 int vlen_enc = vector_length_encoding(this);
18572 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18573 } else {
18574 __ movddup($dst$$XMMRegister, $mem$$Address);
18575 }
18576 %}
18577 ins_pipe( pipe_slow );
18578 %}
18579
18580 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18581 instruct ReplD_imm(vec dst, immD con) %{
18582 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18583 match(Set dst (Replicate con));
18584 format %{ "replicateD $dst,$con" %}
18585 ins_encode %{
18586 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18587 int vlen = Matcher::vector_length_in_bytes(this);
18588 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18589 %}
18590 ins_pipe( pipe_slow );
18591 %}
18592
18593 instruct ReplD_zero(vec dst, immD0 zero) %{
18594 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18595 match(Set dst (Replicate zero));
18596 format %{ "replicateD $dst,$zero" %}
18597 ins_encode %{
18598 int vlen_enc = vector_length_encoding(this);
18599 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18600 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18601 } else {
18602 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18603 }
18604 %}
18605 ins_pipe( fpu_reg_reg );
18606 %}
18607
18608 // ====================VECTOR INSERT=======================================
18609
18610 instruct insert(vec dst, rRegI val, immU8 idx) %{
18611 predicate(Matcher::vector_length_in_bytes(n) < 32);
18612 match(Set dst (VectorInsert (Binary dst val) idx));
18613 format %{ "vector_insert $dst,$val,$idx" %}
18614 ins_encode %{
18615 assert(UseSSE >= 4, "required");
18616 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18617
18618 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18619
18620 assert(is_integral_type(elem_bt), "");
18621 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18622
18623 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18624 %}
18625 ins_pipe( pipe_slow );
18626 %}
18627
18628 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18629 predicate(Matcher::vector_length_in_bytes(n) == 32);
18630 match(Set dst (VectorInsert (Binary src val) idx));
18631 effect(TEMP vtmp);
18632 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18633 ins_encode %{
18634 int vlen_enc = Assembler::AVX_256bit;
18635 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18636 int elem_per_lane = 16/type2aelembytes(elem_bt);
18637 int log2epr = log2(elem_per_lane);
18638
18639 assert(is_integral_type(elem_bt), "sanity");
18640 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18641
18642 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18643 uint y_idx = ($idx$$constant >> log2epr) & 1;
18644 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18645 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18646 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18647 %}
18648 ins_pipe( pipe_slow );
18649 %}
18650
18651 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18652 predicate(Matcher::vector_length_in_bytes(n) == 64);
18653 match(Set dst (VectorInsert (Binary src val) idx));
18654 effect(TEMP vtmp);
18655 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18656 ins_encode %{
18657 assert(UseAVX > 2, "sanity");
18658
18659 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18660 int elem_per_lane = 16/type2aelembytes(elem_bt);
18661 int log2epr = log2(elem_per_lane);
18662
18663 assert(is_integral_type(elem_bt), "");
18664 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18665
18666 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18667 uint y_idx = ($idx$$constant >> log2epr) & 3;
18668 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18669 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18670 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18671 %}
18672 ins_pipe( pipe_slow );
18673 %}
18674
18675 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18676 predicate(Matcher::vector_length(n) == 2);
18677 match(Set dst (VectorInsert (Binary dst val) idx));
18678 format %{ "vector_insert $dst,$val,$idx" %}
18679 ins_encode %{
18680 assert(UseSSE >= 4, "required");
18681 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18682 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18683
18684 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18685 %}
18686 ins_pipe( pipe_slow );
18687 %}
18688
18689 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18690 predicate(Matcher::vector_length(n) == 4);
18691 match(Set dst (VectorInsert (Binary src val) idx));
18692 effect(TEMP vtmp);
18693 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18694 ins_encode %{
18695 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18696 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18697
18698 uint x_idx = $idx$$constant & right_n_bits(1);
18699 uint y_idx = ($idx$$constant >> 1) & 1;
18700 int vlen_enc = Assembler::AVX_256bit;
18701 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18702 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18703 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18704 %}
18705 ins_pipe( pipe_slow );
18706 %}
18707
18708 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
18709 predicate(Matcher::vector_length(n) == 8);
18710 match(Set dst (VectorInsert (Binary src val) idx));
18711 effect(TEMP vtmp);
18712 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18713 ins_encode %{
18714 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
18715 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18716
18717 uint x_idx = $idx$$constant & right_n_bits(1);
18718 uint y_idx = ($idx$$constant >> 1) & 3;
18719 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18720 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18721 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18722 %}
18723 ins_pipe( pipe_slow );
18724 %}
18725
18726 instruct insertF(vec dst, regF val, immU8 idx) %{
18727 predicate(Matcher::vector_length(n) < 8);
18728 match(Set dst (VectorInsert (Binary dst val) idx));
18729 format %{ "vector_insert $dst,$val,$idx" %}
18730 ins_encode %{
18731 assert(UseSSE >= 4, "sanity");
18732
18733 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18734 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18735
18736 uint x_idx = $idx$$constant & right_n_bits(2);
18737 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18738 %}
18739 ins_pipe( pipe_slow );
18740 %}
18741
18742 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
18743 predicate(Matcher::vector_length(n) >= 8);
18744 match(Set dst (VectorInsert (Binary src val) idx));
18745 effect(TEMP vtmp);
18746 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18747 ins_encode %{
18748 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18749 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18750
18751 int vlen = Matcher::vector_length(this);
18752 uint x_idx = $idx$$constant & right_n_bits(2);
18753 if (vlen == 8) {
18754 uint y_idx = ($idx$$constant >> 2) & 1;
18755 int vlen_enc = Assembler::AVX_256bit;
18756 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18757 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18758 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18759 } else {
18760 assert(vlen == 16, "sanity");
18761 uint y_idx = ($idx$$constant >> 2) & 3;
18762 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18763 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18764 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18765 }
18766 %}
18767 ins_pipe( pipe_slow );
18768 %}
18769
18770 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
18771 predicate(Matcher::vector_length(n) == 2);
18772 match(Set dst (VectorInsert (Binary dst val) idx));
18773 effect(TEMP tmp);
18774 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
18775 ins_encode %{
18776 assert(UseSSE >= 4, "sanity");
18777 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18778 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18779
18780 __ movq($tmp$$Register, $val$$XMMRegister);
18781 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
18782 %}
18783 ins_pipe( pipe_slow );
18784 %}
18785
18786 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
18787 predicate(Matcher::vector_length(n) == 4);
18788 match(Set dst (VectorInsert (Binary src val) idx));
18789 effect(TEMP vtmp, TEMP tmp);
18790 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
18791 ins_encode %{
18792 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18793 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18794
18795 uint x_idx = $idx$$constant & right_n_bits(1);
18796 uint y_idx = ($idx$$constant >> 1) & 1;
18797 int vlen_enc = Assembler::AVX_256bit;
18798 __ movq($tmp$$Register, $val$$XMMRegister);
18799 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18800 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18801 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18802 %}
18803 ins_pipe( pipe_slow );
18804 %}
18805
18806 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
18807 predicate(Matcher::vector_length(n) == 8);
18808 match(Set dst (VectorInsert (Binary src val) idx));
18809 effect(TEMP tmp, TEMP vtmp);
18810 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18811 ins_encode %{
18812 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18813 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18814
18815 uint x_idx = $idx$$constant & right_n_bits(1);
18816 uint y_idx = ($idx$$constant >> 1) & 3;
18817 __ movq($tmp$$Register, $val$$XMMRegister);
18818 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18819 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18820 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18821 %}
18822 ins_pipe( pipe_slow );
18823 %}
18824
18825 // ====================REDUCTION ARITHMETIC=======================================
18826
18827 // =======================Int Reduction==========================================
18828
18829 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18830 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
18831 match(Set dst (AddReductionVI src1 src2));
18832 match(Set dst (MulReductionVI src1 src2));
18833 match(Set dst (AndReductionV src1 src2));
18834 match(Set dst ( OrReductionV src1 src2));
18835 match(Set dst (XorReductionV src1 src2));
18836 match(Set dst (MinReductionV src1 src2));
18837 match(Set dst (MaxReductionV src1 src2));
18838 match(Set dst (UMinReductionV src1 src2));
18839 match(Set dst (UMaxReductionV src1 src2));
18840 effect(TEMP vtmp1, TEMP vtmp2);
18841 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18842 ins_encode %{
18843 int opcode = this->ideal_Opcode();
18844 int vlen = Matcher::vector_length(this, $src2);
18845 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18846 %}
18847 ins_pipe( pipe_slow );
18848 %}
18849
18850 // =======================Long Reduction==========================================
18851
18852 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18853 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
18854 match(Set dst (AddReductionVL src1 src2));
18855 match(Set dst (MulReductionVL src1 src2));
18856 match(Set dst (AndReductionV src1 src2));
18857 match(Set dst ( OrReductionV src1 src2));
18858 match(Set dst (XorReductionV src1 src2));
18859 match(Set dst (MinReductionV src1 src2));
18860 match(Set dst (MaxReductionV src1 src2));
18861 match(Set dst (UMinReductionV src1 src2));
18862 match(Set dst (UMaxReductionV src1 src2));
18863 effect(TEMP vtmp1, TEMP vtmp2);
18864 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18865 ins_encode %{
18866 int opcode = this->ideal_Opcode();
18867 int vlen = Matcher::vector_length(this, $src2);
18868 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18869 %}
18870 ins_pipe( pipe_slow );
18871 %}
18872
18873 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
18874 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
18875 match(Set dst (AddReductionVL src1 src2));
18876 match(Set dst (MulReductionVL src1 src2));
18877 match(Set dst (AndReductionV src1 src2));
18878 match(Set dst ( OrReductionV src1 src2));
18879 match(Set dst (XorReductionV src1 src2));
18880 match(Set dst (MinReductionV src1 src2));
18881 match(Set dst (MaxReductionV src1 src2));
18882 match(Set dst (UMinReductionV src1 src2));
18883 match(Set dst (UMaxReductionV src1 src2));
18884 effect(TEMP vtmp1, TEMP vtmp2);
18885 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18886 ins_encode %{
18887 int opcode = this->ideal_Opcode();
18888 int vlen = Matcher::vector_length(this, $src2);
18889 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18890 %}
18891 ins_pipe( pipe_slow );
18892 %}
18893
18894 // =======================Float Reduction==========================================
18895
18896 instruct reductionF128(regF dst, vec src, vec vtmp) %{
18897 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
18898 match(Set dst (AddReductionVF dst src));
18899 match(Set dst (MulReductionVF dst src));
18900 effect(TEMP dst, TEMP vtmp);
18901 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
18902 ins_encode %{
18903 int opcode = this->ideal_Opcode();
18904 int vlen = Matcher::vector_length(this, $src);
18905 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
18906 %}
18907 ins_pipe( pipe_slow );
18908 %}
18909
18910 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
18911 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
18912 match(Set dst (AddReductionVF dst src));
18913 match(Set dst (MulReductionVF dst src));
18914 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18915 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18916 ins_encode %{
18917 int opcode = this->ideal_Opcode();
18918 int vlen = Matcher::vector_length(this, $src);
18919 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18920 %}
18921 ins_pipe( pipe_slow );
18922 %}
18923
18924 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
18925 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
18926 match(Set dst (AddReductionVF dst src));
18927 match(Set dst (MulReductionVF dst src));
18928 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18929 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18930 ins_encode %{
18931 int opcode = this->ideal_Opcode();
18932 int vlen = Matcher::vector_length(this, $src);
18933 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18934 %}
18935 ins_pipe( pipe_slow );
18936 %}
18937
18938
18939 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
18940 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18941 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18942 // src1 contains reduction identity
18943 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
18944 match(Set dst (AddReductionVF src1 src2));
18945 match(Set dst (MulReductionVF src1 src2));
18946 effect(TEMP dst);
18947 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
18948 ins_encode %{
18949 int opcode = this->ideal_Opcode();
18950 int vlen = Matcher::vector_length(this, $src2);
18951 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
18952 %}
18953 ins_pipe( pipe_slow );
18954 %}
18955
18956 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
18957 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18958 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18959 // src1 contains reduction identity
18960 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
18961 match(Set dst (AddReductionVF src1 src2));
18962 match(Set dst (MulReductionVF src1 src2));
18963 effect(TEMP dst, TEMP vtmp);
18964 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
18965 ins_encode %{
18966 int opcode = this->ideal_Opcode();
18967 int vlen = Matcher::vector_length(this, $src2);
18968 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
18969 %}
18970 ins_pipe( pipe_slow );
18971 %}
18972
18973 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
18974 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18975 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18976 // src1 contains reduction identity
18977 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
18978 match(Set dst (AddReductionVF src1 src2));
18979 match(Set dst (MulReductionVF src1 src2));
18980 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18981 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18982 ins_encode %{
18983 int opcode = this->ideal_Opcode();
18984 int vlen = Matcher::vector_length(this, $src2);
18985 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18986 %}
18987 ins_pipe( pipe_slow );
18988 %}
18989
18990 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18991 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18992 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18993 // src1 contains reduction identity
18994 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
18995 match(Set dst (AddReductionVF src1 src2));
18996 match(Set dst (MulReductionVF src1 src2));
18997 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18998 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18999 ins_encode %{
19000 int opcode = this->ideal_Opcode();
19001 int vlen = Matcher::vector_length(this, $src2);
19002 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19003 %}
19004 ins_pipe( pipe_slow );
19005 %}
19006
19007 // =======================Double Reduction==========================================
19008
19009 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19010 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19011 match(Set dst (AddReductionVD dst src));
19012 match(Set dst (MulReductionVD dst src));
19013 effect(TEMP dst, TEMP vtmp);
19014 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19015 ins_encode %{
19016 int opcode = this->ideal_Opcode();
19017 int vlen = Matcher::vector_length(this, $src);
19018 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19019 %}
19020 ins_pipe( pipe_slow );
19021 %}
19022
19023 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19024 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19025 match(Set dst (AddReductionVD dst src));
19026 match(Set dst (MulReductionVD dst src));
19027 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19028 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19029 ins_encode %{
19030 int opcode = this->ideal_Opcode();
19031 int vlen = Matcher::vector_length(this, $src);
19032 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19033 %}
19034 ins_pipe( pipe_slow );
19035 %}
19036
19037 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19038 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19039 match(Set dst (AddReductionVD dst src));
19040 match(Set dst (MulReductionVD dst src));
19041 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19042 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19043 ins_encode %{
19044 int opcode = this->ideal_Opcode();
19045 int vlen = Matcher::vector_length(this, $src);
19046 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19047 %}
19048 ins_pipe( pipe_slow );
19049 %}
19050
19051 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19052 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19053 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19054 // src1 contains reduction identity
19055 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19056 match(Set dst (AddReductionVD src1 src2));
19057 match(Set dst (MulReductionVD src1 src2));
19058 effect(TEMP dst);
19059 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19060 ins_encode %{
19061 int opcode = this->ideal_Opcode();
19062 int vlen = Matcher::vector_length(this, $src2);
19063 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19064 %}
19065 ins_pipe( pipe_slow );
19066 %}
19067
19068 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19069 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19070 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19071 // src1 contains reduction identity
19072 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19073 match(Set dst (AddReductionVD src1 src2));
19074 match(Set dst (MulReductionVD src1 src2));
19075 effect(TEMP dst, TEMP vtmp);
19076 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19077 ins_encode %{
19078 int opcode = this->ideal_Opcode();
19079 int vlen = Matcher::vector_length(this, $src2);
19080 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19081 %}
19082 ins_pipe( pipe_slow );
19083 %}
19084
19085 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19086 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19087 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19088 // src1 contains reduction identity
19089 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19090 match(Set dst (AddReductionVD src1 src2));
19091 match(Set dst (MulReductionVD src1 src2));
19092 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19093 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19094 ins_encode %{
19095 int opcode = this->ideal_Opcode();
19096 int vlen = Matcher::vector_length(this, $src2);
19097 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19098 %}
19099 ins_pipe( pipe_slow );
19100 %}
19101
19102 // =======================Byte Reduction==========================================
19103
19104 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19105 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19106 match(Set dst (AddReductionVI src1 src2));
19107 match(Set dst (AndReductionV src1 src2));
19108 match(Set dst ( OrReductionV src1 src2));
19109 match(Set dst (XorReductionV src1 src2));
19110 match(Set dst (MinReductionV src1 src2));
19111 match(Set dst (MaxReductionV src1 src2));
19112 match(Set dst (UMinReductionV src1 src2));
19113 match(Set dst (UMaxReductionV src1 src2));
19114 effect(TEMP vtmp1, TEMP vtmp2);
19115 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19116 ins_encode %{
19117 int opcode = this->ideal_Opcode();
19118 int vlen = Matcher::vector_length(this, $src2);
19119 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19120 %}
19121 ins_pipe( pipe_slow );
19122 %}
19123
19124 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19125 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19126 match(Set dst (AddReductionVI src1 src2));
19127 match(Set dst (AndReductionV src1 src2));
19128 match(Set dst ( OrReductionV src1 src2));
19129 match(Set dst (XorReductionV src1 src2));
19130 match(Set dst (MinReductionV src1 src2));
19131 match(Set dst (MaxReductionV src1 src2));
19132 match(Set dst (UMinReductionV src1 src2));
19133 match(Set dst (UMaxReductionV src1 src2));
19134 effect(TEMP vtmp1, TEMP vtmp2);
19135 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19136 ins_encode %{
19137 int opcode = this->ideal_Opcode();
19138 int vlen = Matcher::vector_length(this, $src2);
19139 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19140 %}
19141 ins_pipe( pipe_slow );
19142 %}
19143
19144 // =======================Short Reduction==========================================
19145
19146 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19147 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19148 match(Set dst (AddReductionVI src1 src2));
19149 match(Set dst (MulReductionVI src1 src2));
19150 match(Set dst (AndReductionV src1 src2));
19151 match(Set dst ( OrReductionV src1 src2));
19152 match(Set dst (XorReductionV src1 src2));
19153 match(Set dst (MinReductionV src1 src2));
19154 match(Set dst (MaxReductionV src1 src2));
19155 match(Set dst (UMinReductionV src1 src2));
19156 match(Set dst (UMaxReductionV src1 src2));
19157 effect(TEMP vtmp1, TEMP vtmp2);
19158 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19159 ins_encode %{
19160 int opcode = this->ideal_Opcode();
19161 int vlen = Matcher::vector_length(this, $src2);
19162 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19163 %}
19164 ins_pipe( pipe_slow );
19165 %}
19166
19167 // =======================Mul Reduction==========================================
19168
19169 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19170 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19171 Matcher::vector_length(n->in(2)) <= 32); // src2
19172 match(Set dst (MulReductionVI src1 src2));
19173 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19174 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19175 ins_encode %{
19176 int opcode = this->ideal_Opcode();
19177 int vlen = Matcher::vector_length(this, $src2);
19178 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19179 %}
19180 ins_pipe( pipe_slow );
19181 %}
19182
19183 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19184 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19185 Matcher::vector_length(n->in(2)) == 64); // src2
19186 match(Set dst (MulReductionVI src1 src2));
19187 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19188 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19189 ins_encode %{
19190 int opcode = this->ideal_Opcode();
19191 int vlen = Matcher::vector_length(this, $src2);
19192 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19193 %}
19194 ins_pipe( pipe_slow );
19195 %}
19196
19197 //--------------------Min/Max Float Reduction --------------------
19198 // Float Min Reduction
19199 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19200 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19201 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19202 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19203 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19204 Matcher::vector_length(n->in(2)) == 2);
19205 match(Set dst (MinReductionV src1 src2));
19206 match(Set dst (MaxReductionV src1 src2));
19207 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19208 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19209 ins_encode %{
19210 assert(UseAVX > 0, "sanity");
19211
19212 int opcode = this->ideal_Opcode();
19213 int vlen = Matcher::vector_length(this, $src2);
19214 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19215 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19216 %}
19217 ins_pipe( pipe_slow );
19218 %}
19219
19220 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19221 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19222 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19223 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19224 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19225 Matcher::vector_length(n->in(2)) >= 4);
19226 match(Set dst (MinReductionV src1 src2));
19227 match(Set dst (MaxReductionV src1 src2));
19228 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19229 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19230 ins_encode %{
19231 assert(UseAVX > 0, "sanity");
19232
19233 int opcode = this->ideal_Opcode();
19234 int vlen = Matcher::vector_length(this, $src2);
19235 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19236 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19237 %}
19238 ins_pipe( pipe_slow );
19239 %}
19240
19241 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19242 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19243 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19244 Matcher::vector_length(n->in(2)) == 2);
19245 match(Set dst (MinReductionV dst src));
19246 match(Set dst (MaxReductionV dst src));
19247 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19248 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19249 ins_encode %{
19250 assert(UseAVX > 0, "sanity");
19251
19252 int opcode = this->ideal_Opcode();
19253 int vlen = Matcher::vector_length(this, $src);
19254 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19255 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19256 %}
19257 ins_pipe( pipe_slow );
19258 %}
19259
19260
19261 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19262 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19263 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19264 Matcher::vector_length(n->in(2)) >= 4);
19265 match(Set dst (MinReductionV dst src));
19266 match(Set dst (MaxReductionV dst src));
19267 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19268 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19269 ins_encode %{
19270 assert(UseAVX > 0, "sanity");
19271
19272 int opcode = this->ideal_Opcode();
19273 int vlen = Matcher::vector_length(this, $src);
19274 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19275 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19276 %}
19277 ins_pipe( pipe_slow );
19278 %}
19279
19280 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19281 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19282 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19283 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19284 Matcher::vector_length(n->in(2)) == 2);
19285 match(Set dst (MinReductionV src1 src2));
19286 match(Set dst (MaxReductionV src1 src2));
19287 effect(TEMP dst, TEMP xtmp1);
19288 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19289 ins_encode %{
19290 int opcode = this->ideal_Opcode();
19291 int vlen = Matcher::vector_length(this, $src2);
19292 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19293 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19294 %}
19295 ins_pipe( pipe_slow );
19296 %}
19297
19298 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19299 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19300 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19301 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19302 Matcher::vector_length(n->in(2)) >= 4);
19303 match(Set dst (MinReductionV src1 src2));
19304 match(Set dst (MaxReductionV src1 src2));
19305 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19306 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19307 ins_encode %{
19308 int opcode = this->ideal_Opcode();
19309 int vlen = Matcher::vector_length(this, $src2);
19310 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19311 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19312 %}
19313 ins_pipe( pipe_slow );
19314 %}
19315
19316 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19317 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19318 Matcher::vector_length(n->in(2)) == 2);
19319 match(Set dst (MinReductionV dst src));
19320 match(Set dst (MaxReductionV dst src));
19321 effect(TEMP dst, TEMP xtmp1);
19322 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19323 ins_encode %{
19324 int opcode = this->ideal_Opcode();
19325 int vlen = Matcher::vector_length(this, $src);
19326 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19327 $xtmp1$$XMMRegister);
19328 %}
19329 ins_pipe( pipe_slow );
19330 %}
19331
19332 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19333 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19334 Matcher::vector_length(n->in(2)) >= 4);
19335 match(Set dst (MinReductionV dst src));
19336 match(Set dst (MaxReductionV dst src));
19337 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19338 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19339 ins_encode %{
19340 int opcode = this->ideal_Opcode();
19341 int vlen = Matcher::vector_length(this, $src);
19342 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19343 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19344 %}
19345 ins_pipe( pipe_slow );
19346 %}
19347
19348 //--------------------Min Double Reduction --------------------
19349 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19350 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19351 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19352 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19353 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19354 Matcher::vector_length(n->in(2)) == 2);
19355 match(Set dst (MinReductionV src1 src2));
19356 match(Set dst (MaxReductionV src1 src2));
19357 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19358 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19359 ins_encode %{
19360 assert(UseAVX > 0, "sanity");
19361
19362 int opcode = this->ideal_Opcode();
19363 int vlen = Matcher::vector_length(this, $src2);
19364 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19365 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19366 %}
19367 ins_pipe( pipe_slow );
19368 %}
19369
19370 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19371 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19372 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19373 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19374 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19375 Matcher::vector_length(n->in(2)) >= 4);
19376 match(Set dst (MinReductionV src1 src2));
19377 match(Set dst (MaxReductionV src1 src2));
19378 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19379 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19380 ins_encode %{
19381 assert(UseAVX > 0, "sanity");
19382
19383 int opcode = this->ideal_Opcode();
19384 int vlen = Matcher::vector_length(this, $src2);
19385 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19386 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19387 %}
19388 ins_pipe( pipe_slow );
19389 %}
19390
19391
19392 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19393 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19394 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19395 Matcher::vector_length(n->in(2)) == 2);
19396 match(Set dst (MinReductionV dst src));
19397 match(Set dst (MaxReductionV dst src));
19398 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19399 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19400 ins_encode %{
19401 assert(UseAVX > 0, "sanity");
19402
19403 int opcode = this->ideal_Opcode();
19404 int vlen = Matcher::vector_length(this, $src);
19405 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19406 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19407 %}
19408 ins_pipe( pipe_slow );
19409 %}
19410
19411 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19412 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19413 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19414 Matcher::vector_length(n->in(2)) >= 4);
19415 match(Set dst (MinReductionV dst src));
19416 match(Set dst (MaxReductionV dst src));
19417 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19418 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19419 ins_encode %{
19420 assert(UseAVX > 0, "sanity");
19421
19422 int opcode = this->ideal_Opcode();
19423 int vlen = Matcher::vector_length(this, $src);
19424 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19425 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19426 %}
19427 ins_pipe( pipe_slow );
19428 %}
19429
19430 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19431 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19432 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19433 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19434 Matcher::vector_length(n->in(2)) == 2);
19435 match(Set dst (MinReductionV src1 src2));
19436 match(Set dst (MaxReductionV src1 src2));
19437 effect(TEMP dst, TEMP xtmp1);
19438 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19439 ins_encode %{
19440 int opcode = this->ideal_Opcode();
19441 int vlen = Matcher::vector_length(this, $src2);
19442 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19443 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19444 %}
19445 ins_pipe( pipe_slow );
19446 %}
19447
19448 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19449 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19450 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19451 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19452 Matcher::vector_length(n->in(2)) >= 4);
19453 match(Set dst (MinReductionV src1 src2));
19454 match(Set dst (MaxReductionV src1 src2));
19455 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19456 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19457 ins_encode %{
19458 int opcode = this->ideal_Opcode();
19459 int vlen = Matcher::vector_length(this, $src2);
19460 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19461 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19462 %}
19463 ins_pipe( pipe_slow );
19464 %}
19465
19466
19467 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19468 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19469 Matcher::vector_length(n->in(2)) == 2);
19470 match(Set dst (MinReductionV dst src));
19471 match(Set dst (MaxReductionV dst src));
19472 effect(TEMP dst, TEMP xtmp1);
19473 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19474 ins_encode %{
19475 int opcode = this->ideal_Opcode();
19476 int vlen = Matcher::vector_length(this, $src);
19477 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19478 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19479 %}
19480 ins_pipe( pipe_slow );
19481 %}
19482
19483 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19484 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19485 Matcher::vector_length(n->in(2)) >= 4);
19486 match(Set dst (MinReductionV dst src));
19487 match(Set dst (MaxReductionV dst src));
19488 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19489 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19490 ins_encode %{
19491 int opcode = this->ideal_Opcode();
19492 int vlen = Matcher::vector_length(this, $src);
19493 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19494 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19495 %}
19496 ins_pipe( pipe_slow );
19497 %}
19498
19499 // ====================VECTOR ARITHMETIC=======================================
19500
19501 // --------------------------------- ADD --------------------------------------
19502
19503 // Bytes vector add
19504 instruct vaddB(vec dst, vec src) %{
19505 predicate(UseAVX == 0);
19506 match(Set dst (AddVB dst src));
19507 format %{ "paddb $dst,$src\t! add packedB" %}
19508 ins_encode %{
19509 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19510 %}
19511 ins_pipe( pipe_slow );
19512 %}
19513
19514 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19515 predicate(UseAVX > 0);
19516 match(Set dst (AddVB src1 src2));
19517 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19518 ins_encode %{
19519 int vlen_enc = vector_length_encoding(this);
19520 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19521 %}
19522 ins_pipe( pipe_slow );
19523 %}
19524
19525 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19526 predicate((UseAVX > 0) &&
19527 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19528 match(Set dst (AddVB src (LoadVector mem)));
19529 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19530 ins_encode %{
19531 int vlen_enc = vector_length_encoding(this);
19532 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19533 %}
19534 ins_pipe( pipe_slow );
19535 %}
19536
19537 // Shorts/Chars vector add
19538 instruct vaddS(vec dst, vec src) %{
19539 predicate(UseAVX == 0);
19540 match(Set dst (AddVS dst src));
19541 format %{ "paddw $dst,$src\t! add packedS" %}
19542 ins_encode %{
19543 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19544 %}
19545 ins_pipe( pipe_slow );
19546 %}
19547
19548 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19549 predicate(UseAVX > 0);
19550 match(Set dst (AddVS src1 src2));
19551 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19552 ins_encode %{
19553 int vlen_enc = vector_length_encoding(this);
19554 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19555 %}
19556 ins_pipe( pipe_slow );
19557 %}
19558
19559 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19560 predicate((UseAVX > 0) &&
19561 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19562 match(Set dst (AddVS src (LoadVector mem)));
19563 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19564 ins_encode %{
19565 int vlen_enc = vector_length_encoding(this);
19566 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19567 %}
19568 ins_pipe( pipe_slow );
19569 %}
19570
19571 // Integers vector add
19572 instruct vaddI(vec dst, vec src) %{
19573 predicate(UseAVX == 0);
19574 match(Set dst (AddVI dst src));
19575 format %{ "paddd $dst,$src\t! add packedI" %}
19576 ins_encode %{
19577 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19578 %}
19579 ins_pipe( pipe_slow );
19580 %}
19581
19582 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19583 predicate(UseAVX > 0);
19584 match(Set dst (AddVI src1 src2));
19585 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19586 ins_encode %{
19587 int vlen_enc = vector_length_encoding(this);
19588 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19589 %}
19590 ins_pipe( pipe_slow );
19591 %}
19592
19593
19594 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19595 predicate((UseAVX > 0) &&
19596 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19597 match(Set dst (AddVI src (LoadVector mem)));
19598 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19599 ins_encode %{
19600 int vlen_enc = vector_length_encoding(this);
19601 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19602 %}
19603 ins_pipe( pipe_slow );
19604 %}
19605
19606 // Longs vector add
19607 instruct vaddL(vec dst, vec src) %{
19608 predicate(UseAVX == 0);
19609 match(Set dst (AddVL dst src));
19610 format %{ "paddq $dst,$src\t! add packedL" %}
19611 ins_encode %{
19612 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19613 %}
19614 ins_pipe( pipe_slow );
19615 %}
19616
19617 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19618 predicate(UseAVX > 0);
19619 match(Set dst (AddVL src1 src2));
19620 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19621 ins_encode %{
19622 int vlen_enc = vector_length_encoding(this);
19623 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19624 %}
19625 ins_pipe( pipe_slow );
19626 %}
19627
19628 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19629 predicate((UseAVX > 0) &&
19630 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19631 match(Set dst (AddVL src (LoadVector mem)));
19632 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19633 ins_encode %{
19634 int vlen_enc = vector_length_encoding(this);
19635 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19636 %}
19637 ins_pipe( pipe_slow );
19638 %}
19639
19640 // Floats vector add
19641 instruct vaddF(vec dst, vec src) %{
19642 predicate(UseAVX == 0);
19643 match(Set dst (AddVF dst src));
19644 format %{ "addps $dst,$src\t! add packedF" %}
19645 ins_encode %{
19646 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19647 %}
19648 ins_pipe( pipe_slow );
19649 %}
19650
19651 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19652 predicate(UseAVX > 0);
19653 match(Set dst (AddVF src1 src2));
19654 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19655 ins_encode %{
19656 int vlen_enc = vector_length_encoding(this);
19657 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19658 %}
19659 ins_pipe( pipe_slow );
19660 %}
19661
19662 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19663 predicate((UseAVX > 0) &&
19664 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19665 match(Set dst (AddVF src (LoadVector mem)));
19666 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
19667 ins_encode %{
19668 int vlen_enc = vector_length_encoding(this);
19669 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19670 %}
19671 ins_pipe( pipe_slow );
19672 %}
19673
19674 // Doubles vector add
19675 instruct vaddD(vec dst, vec src) %{
19676 predicate(UseAVX == 0);
19677 match(Set dst (AddVD dst src));
19678 format %{ "addpd $dst,$src\t! add packedD" %}
19679 ins_encode %{
19680 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19681 %}
19682 ins_pipe( pipe_slow );
19683 %}
19684
19685 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19686 predicate(UseAVX > 0);
19687 match(Set dst (AddVD src1 src2));
19688 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
19689 ins_encode %{
19690 int vlen_enc = vector_length_encoding(this);
19691 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19692 %}
19693 ins_pipe( pipe_slow );
19694 %}
19695
19696 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19697 predicate((UseAVX > 0) &&
19698 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19699 match(Set dst (AddVD src (LoadVector mem)));
19700 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
19701 ins_encode %{
19702 int vlen_enc = vector_length_encoding(this);
19703 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19704 %}
19705 ins_pipe( pipe_slow );
19706 %}
19707
19708 // --------------------------------- SUB --------------------------------------
19709
19710 // Bytes vector sub
19711 instruct vsubB(vec dst, vec src) %{
19712 predicate(UseAVX == 0);
19713 match(Set dst (SubVB dst src));
19714 format %{ "psubb $dst,$src\t! sub packedB" %}
19715 ins_encode %{
19716 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
19717 %}
19718 ins_pipe( pipe_slow );
19719 %}
19720
19721 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
19722 predicate(UseAVX > 0);
19723 match(Set dst (SubVB src1 src2));
19724 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
19725 ins_encode %{
19726 int vlen_enc = vector_length_encoding(this);
19727 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19728 %}
19729 ins_pipe( pipe_slow );
19730 %}
19731
19732 instruct vsubB_mem(vec dst, vec src, memory mem) %{
19733 predicate((UseAVX > 0) &&
19734 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19735 match(Set dst (SubVB src (LoadVector mem)));
19736 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
19737 ins_encode %{
19738 int vlen_enc = vector_length_encoding(this);
19739 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19740 %}
19741 ins_pipe( pipe_slow );
19742 %}
19743
19744 // Shorts/Chars vector sub
19745 instruct vsubS(vec dst, vec src) %{
19746 predicate(UseAVX == 0);
19747 match(Set dst (SubVS dst src));
19748 format %{ "psubw $dst,$src\t! sub packedS" %}
19749 ins_encode %{
19750 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
19751 %}
19752 ins_pipe( pipe_slow );
19753 %}
19754
19755
19756 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
19757 predicate(UseAVX > 0);
19758 match(Set dst (SubVS src1 src2));
19759 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
19760 ins_encode %{
19761 int vlen_enc = vector_length_encoding(this);
19762 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19763 %}
19764 ins_pipe( pipe_slow );
19765 %}
19766
19767 instruct vsubS_mem(vec dst, vec src, memory mem) %{
19768 predicate((UseAVX > 0) &&
19769 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19770 match(Set dst (SubVS src (LoadVector mem)));
19771 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
19772 ins_encode %{
19773 int vlen_enc = vector_length_encoding(this);
19774 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19775 %}
19776 ins_pipe( pipe_slow );
19777 %}
19778
19779 // Integers vector sub
19780 instruct vsubI(vec dst, vec src) %{
19781 predicate(UseAVX == 0);
19782 match(Set dst (SubVI dst src));
19783 format %{ "psubd $dst,$src\t! sub packedI" %}
19784 ins_encode %{
19785 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
19786 %}
19787 ins_pipe( pipe_slow );
19788 %}
19789
19790 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
19791 predicate(UseAVX > 0);
19792 match(Set dst (SubVI src1 src2));
19793 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
19794 ins_encode %{
19795 int vlen_enc = vector_length_encoding(this);
19796 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19797 %}
19798 ins_pipe( pipe_slow );
19799 %}
19800
19801 instruct vsubI_mem(vec dst, vec src, memory mem) %{
19802 predicate((UseAVX > 0) &&
19803 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19804 match(Set dst (SubVI src (LoadVector mem)));
19805 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
19806 ins_encode %{
19807 int vlen_enc = vector_length_encoding(this);
19808 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19809 %}
19810 ins_pipe( pipe_slow );
19811 %}
19812
19813 // Longs vector sub
19814 instruct vsubL(vec dst, vec src) %{
19815 predicate(UseAVX == 0);
19816 match(Set dst (SubVL dst src));
19817 format %{ "psubq $dst,$src\t! sub packedL" %}
19818 ins_encode %{
19819 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
19820 %}
19821 ins_pipe( pipe_slow );
19822 %}
19823
19824 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
19825 predicate(UseAVX > 0);
19826 match(Set dst (SubVL src1 src2));
19827 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
19828 ins_encode %{
19829 int vlen_enc = vector_length_encoding(this);
19830 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19831 %}
19832 ins_pipe( pipe_slow );
19833 %}
19834
19835
19836 instruct vsubL_mem(vec dst, vec src, memory mem) %{
19837 predicate((UseAVX > 0) &&
19838 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19839 match(Set dst (SubVL src (LoadVector mem)));
19840 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
19841 ins_encode %{
19842 int vlen_enc = vector_length_encoding(this);
19843 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19844 %}
19845 ins_pipe( pipe_slow );
19846 %}
19847
19848 // Floats vector sub
19849 instruct vsubF(vec dst, vec src) %{
19850 predicate(UseAVX == 0);
19851 match(Set dst (SubVF dst src));
19852 format %{ "subps $dst,$src\t! sub packedF" %}
19853 ins_encode %{
19854 __ subps($dst$$XMMRegister, $src$$XMMRegister);
19855 %}
19856 ins_pipe( pipe_slow );
19857 %}
19858
19859 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
19860 predicate(UseAVX > 0);
19861 match(Set dst (SubVF src1 src2));
19862 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
19863 ins_encode %{
19864 int vlen_enc = vector_length_encoding(this);
19865 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19866 %}
19867 ins_pipe( pipe_slow );
19868 %}
19869
19870 instruct vsubF_mem(vec dst, vec src, memory mem) %{
19871 predicate((UseAVX > 0) &&
19872 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19873 match(Set dst (SubVF src (LoadVector mem)));
19874 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
19875 ins_encode %{
19876 int vlen_enc = vector_length_encoding(this);
19877 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19878 %}
19879 ins_pipe( pipe_slow );
19880 %}
19881
19882 // Doubles vector sub
19883 instruct vsubD(vec dst, vec src) %{
19884 predicate(UseAVX == 0);
19885 match(Set dst (SubVD dst src));
19886 format %{ "subpd $dst,$src\t! sub packedD" %}
19887 ins_encode %{
19888 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
19889 %}
19890 ins_pipe( pipe_slow );
19891 %}
19892
19893 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
19894 predicate(UseAVX > 0);
19895 match(Set dst (SubVD src1 src2));
19896 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
19897 ins_encode %{
19898 int vlen_enc = vector_length_encoding(this);
19899 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19900 %}
19901 ins_pipe( pipe_slow );
19902 %}
19903
19904 instruct vsubD_mem(vec dst, vec src, memory mem) %{
19905 predicate((UseAVX > 0) &&
19906 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19907 match(Set dst (SubVD src (LoadVector mem)));
19908 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
19909 ins_encode %{
19910 int vlen_enc = vector_length_encoding(this);
19911 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19912 %}
19913 ins_pipe( pipe_slow );
19914 %}
19915
19916 // --------------------------------- MUL --------------------------------------
19917
19918 // Byte vector mul
19919 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
19920 predicate(Matcher::vector_length_in_bytes(n) <= 8);
19921 match(Set dst (MulVB src1 src2));
19922 effect(TEMP dst, TEMP xtmp);
19923 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19924 ins_encode %{
19925 assert(UseSSE > 3, "required");
19926 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
19927 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
19928 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19929 __ psllw($dst$$XMMRegister, 8);
19930 __ psrlw($dst$$XMMRegister, 8);
19931 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
19932 %}
19933 ins_pipe( pipe_slow );
19934 %}
19935
19936 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
19937 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
19938 match(Set dst (MulVB src1 src2));
19939 effect(TEMP dst, TEMP xtmp);
19940 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19941 ins_encode %{
19942 assert(UseSSE > 3, "required");
19943 // Odd-index elements
19944 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
19945 __ psrlw($dst$$XMMRegister, 8);
19946 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
19947 __ psrlw($xtmp$$XMMRegister, 8);
19948 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19949 __ psllw($dst$$XMMRegister, 8);
19950 // Even-index elements
19951 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
19952 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
19953 __ psllw($xtmp$$XMMRegister, 8);
19954 __ psrlw($xtmp$$XMMRegister, 8);
19955 // Combine
19956 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
19957 %}
19958 ins_pipe( pipe_slow );
19959 %}
19960
19961 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
19962 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
19963 match(Set dst (MulVB src1 src2));
19964 effect(TEMP xtmp1, TEMP xtmp2);
19965 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
19966 ins_encode %{
19967 int vlen_enc = vector_length_encoding(this);
19968 // Odd-index elements
19969 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
19970 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
19971 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19972 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
19973 // Even-index elements
19974 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19975 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19976 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19977 // Combine
19978 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19979 %}
19980 ins_pipe( pipe_slow );
19981 %}
19982
19983 // Shorts/Chars vector mul
19984 instruct vmulS(vec dst, vec src) %{
19985 predicate(UseAVX == 0);
19986 match(Set dst (MulVS dst src));
19987 format %{ "pmullw $dst,$src\t! mul packedS" %}
19988 ins_encode %{
19989 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
19990 %}
19991 ins_pipe( pipe_slow );
19992 %}
19993
19994 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
19995 predicate(UseAVX > 0);
19996 match(Set dst (MulVS src1 src2));
19997 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
19998 ins_encode %{
19999 int vlen_enc = vector_length_encoding(this);
20000 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20001 %}
20002 ins_pipe( pipe_slow );
20003 %}
20004
20005 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20006 predicate((UseAVX > 0) &&
20007 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20008 match(Set dst (MulVS src (LoadVector mem)));
20009 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20010 ins_encode %{
20011 int vlen_enc = vector_length_encoding(this);
20012 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20013 %}
20014 ins_pipe( pipe_slow );
20015 %}
20016
20017 // Integers vector mul
20018 instruct vmulI(vec dst, vec src) %{
20019 predicate(UseAVX == 0);
20020 match(Set dst (MulVI dst src));
20021 format %{ "pmulld $dst,$src\t! mul packedI" %}
20022 ins_encode %{
20023 assert(UseSSE > 3, "required");
20024 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20025 %}
20026 ins_pipe( pipe_slow );
20027 %}
20028
20029 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20030 predicate(UseAVX > 0);
20031 match(Set dst (MulVI src1 src2));
20032 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20033 ins_encode %{
20034 int vlen_enc = vector_length_encoding(this);
20035 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20036 %}
20037 ins_pipe( pipe_slow );
20038 %}
20039
20040 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20041 predicate((UseAVX > 0) &&
20042 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20043 match(Set dst (MulVI src (LoadVector mem)));
20044 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20045 ins_encode %{
20046 int vlen_enc = vector_length_encoding(this);
20047 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20048 %}
20049 ins_pipe( pipe_slow );
20050 %}
20051
20052 // Longs vector mul
20053 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20054 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20055 VM_Version::supports_avx512dq()) ||
20056 VM_Version::supports_avx512vldq());
20057 match(Set dst (MulVL src1 src2));
20058 ins_cost(500);
20059 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20060 ins_encode %{
20061 assert(UseAVX > 2, "required");
20062 int vlen_enc = vector_length_encoding(this);
20063 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20064 %}
20065 ins_pipe( pipe_slow );
20066 %}
20067
20068 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20069 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20070 VM_Version::supports_avx512dq()) ||
20071 (Matcher::vector_length_in_bytes(n) > 8 &&
20072 VM_Version::supports_avx512vldq()));
20073 match(Set dst (MulVL src (LoadVector mem)));
20074 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20075 ins_cost(500);
20076 ins_encode %{
20077 assert(UseAVX > 2, "required");
20078 int vlen_enc = vector_length_encoding(this);
20079 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20080 %}
20081 ins_pipe( pipe_slow );
20082 %}
20083
20084 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20085 predicate(UseAVX == 0);
20086 match(Set dst (MulVL src1 src2));
20087 ins_cost(500);
20088 effect(TEMP dst, TEMP xtmp);
20089 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20090 ins_encode %{
20091 assert(VM_Version::supports_sse4_1(), "required");
20092 // Get the lo-hi products, only the lower 32 bits is in concerns
20093 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20094 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20095 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20096 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20097 __ psllq($dst$$XMMRegister, 32);
20098 // Get the lo-lo products
20099 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20100 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20101 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20102 %}
20103 ins_pipe( pipe_slow );
20104 %}
20105
20106 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20107 predicate(UseAVX > 0 &&
20108 ((Matcher::vector_length_in_bytes(n) == 64 &&
20109 !VM_Version::supports_avx512dq()) ||
20110 (Matcher::vector_length_in_bytes(n) < 64 &&
20111 !VM_Version::supports_avx512vldq())));
20112 match(Set dst (MulVL src1 src2));
20113 effect(TEMP xtmp1, TEMP xtmp2);
20114 ins_cost(500);
20115 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20116 ins_encode %{
20117 int vlen_enc = vector_length_encoding(this);
20118 // Get the lo-hi products, only the lower 32 bits is in concerns
20119 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20120 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20121 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20122 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20123 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20124 // Get the lo-lo products
20125 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20126 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20127 %}
20128 ins_pipe( pipe_slow );
20129 %}
20130
20131 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20132 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20133 match(Set dst (MulVL src1 src2));
20134 ins_cost(100);
20135 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20136 ins_encode %{
20137 int vlen_enc = vector_length_encoding(this);
20138 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20139 %}
20140 ins_pipe( pipe_slow );
20141 %}
20142
20143 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20144 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20145 match(Set dst (MulVL src1 src2));
20146 ins_cost(100);
20147 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20148 ins_encode %{
20149 int vlen_enc = vector_length_encoding(this);
20150 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20151 %}
20152 ins_pipe( pipe_slow );
20153 %}
20154
20155 // Floats vector mul
20156 instruct vmulF(vec dst, vec src) %{
20157 predicate(UseAVX == 0);
20158 match(Set dst (MulVF dst src));
20159 format %{ "mulps $dst,$src\t! mul packedF" %}
20160 ins_encode %{
20161 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20162 %}
20163 ins_pipe( pipe_slow );
20164 %}
20165
20166 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20167 predicate(UseAVX > 0);
20168 match(Set dst (MulVF src1 src2));
20169 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20170 ins_encode %{
20171 int vlen_enc = vector_length_encoding(this);
20172 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20173 %}
20174 ins_pipe( pipe_slow );
20175 %}
20176
20177 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20178 predicate((UseAVX > 0) &&
20179 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20180 match(Set dst (MulVF src (LoadVector mem)));
20181 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20182 ins_encode %{
20183 int vlen_enc = vector_length_encoding(this);
20184 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20185 %}
20186 ins_pipe( pipe_slow );
20187 %}
20188
20189 // Doubles vector mul
20190 instruct vmulD(vec dst, vec src) %{
20191 predicate(UseAVX == 0);
20192 match(Set dst (MulVD dst src));
20193 format %{ "mulpd $dst,$src\t! mul packedD" %}
20194 ins_encode %{
20195 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20196 %}
20197 ins_pipe( pipe_slow );
20198 %}
20199
20200 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20201 predicate(UseAVX > 0);
20202 match(Set dst (MulVD src1 src2));
20203 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20204 ins_encode %{
20205 int vlen_enc = vector_length_encoding(this);
20206 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20207 %}
20208 ins_pipe( pipe_slow );
20209 %}
20210
20211 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20212 predicate((UseAVX > 0) &&
20213 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20214 match(Set dst (MulVD src (LoadVector mem)));
20215 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20216 ins_encode %{
20217 int vlen_enc = vector_length_encoding(this);
20218 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20219 %}
20220 ins_pipe( pipe_slow );
20221 %}
20222
20223 // --------------------------------- DIV --------------------------------------
20224
20225 // Floats vector div
20226 instruct vdivF(vec dst, vec src) %{
20227 predicate(UseAVX == 0);
20228 match(Set dst (DivVF dst src));
20229 format %{ "divps $dst,$src\t! div packedF" %}
20230 ins_encode %{
20231 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20232 %}
20233 ins_pipe( pipe_slow );
20234 %}
20235
20236 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20237 predicate(UseAVX > 0);
20238 match(Set dst (DivVF src1 src2));
20239 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20240 ins_encode %{
20241 int vlen_enc = vector_length_encoding(this);
20242 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20243 %}
20244 ins_pipe( pipe_slow );
20245 %}
20246
20247 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20248 predicate((UseAVX > 0) &&
20249 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20250 match(Set dst (DivVF src (LoadVector mem)));
20251 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20252 ins_encode %{
20253 int vlen_enc = vector_length_encoding(this);
20254 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20255 %}
20256 ins_pipe( pipe_slow );
20257 %}
20258
20259 // Doubles vector div
20260 instruct vdivD(vec dst, vec src) %{
20261 predicate(UseAVX == 0);
20262 match(Set dst (DivVD dst src));
20263 format %{ "divpd $dst,$src\t! div packedD" %}
20264 ins_encode %{
20265 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20266 %}
20267 ins_pipe( pipe_slow );
20268 %}
20269
20270 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20271 predicate(UseAVX > 0);
20272 match(Set dst (DivVD src1 src2));
20273 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20274 ins_encode %{
20275 int vlen_enc = vector_length_encoding(this);
20276 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20277 %}
20278 ins_pipe( pipe_slow );
20279 %}
20280
20281 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20282 predicate((UseAVX > 0) &&
20283 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20284 match(Set dst (DivVD src (LoadVector mem)));
20285 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20286 ins_encode %{
20287 int vlen_enc = vector_length_encoding(this);
20288 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20289 %}
20290 ins_pipe( pipe_slow );
20291 %}
20292
20293 // ------------------------------ MinMax ---------------------------------------
20294
20295 // Byte, Short, Int vector Min/Max
20296 instruct minmax_reg_sse(vec dst, vec src) %{
20297 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20298 UseAVX == 0);
20299 match(Set dst (MinV dst src));
20300 match(Set dst (MaxV dst src));
20301 format %{ "vector_minmax $dst,$src\t! " %}
20302 ins_encode %{
20303 assert(UseSSE >= 4, "required");
20304
20305 int opcode = this->ideal_Opcode();
20306 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20307 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20308 %}
20309 ins_pipe( pipe_slow );
20310 %}
20311
20312 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20313 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20314 UseAVX > 0);
20315 match(Set dst (MinV src1 src2));
20316 match(Set dst (MaxV src1 src2));
20317 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20318 ins_encode %{
20319 int opcode = this->ideal_Opcode();
20320 int vlen_enc = vector_length_encoding(this);
20321 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20322
20323 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20324 %}
20325 ins_pipe( pipe_slow );
20326 %}
20327
20328 // Long vector Min/Max
20329 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20330 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20331 UseAVX == 0);
20332 match(Set dst (MinV dst src));
20333 match(Set dst (MaxV src dst));
20334 effect(TEMP dst, TEMP tmp);
20335 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20336 ins_encode %{
20337 assert(UseSSE >= 4, "required");
20338
20339 int opcode = this->ideal_Opcode();
20340 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20341 assert(elem_bt == T_LONG, "sanity");
20342
20343 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20344 %}
20345 ins_pipe( pipe_slow );
20346 %}
20347
20348 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20349 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20350 UseAVX > 0 && !VM_Version::supports_avx512vl());
20351 match(Set dst (MinV src1 src2));
20352 match(Set dst (MaxV src1 src2));
20353 effect(TEMP dst);
20354 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20355 ins_encode %{
20356 int vlen_enc = vector_length_encoding(this);
20357 int opcode = this->ideal_Opcode();
20358 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20359 assert(elem_bt == T_LONG, "sanity");
20360
20361 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20362 %}
20363 ins_pipe( pipe_slow );
20364 %}
20365
20366 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20367 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20368 Matcher::vector_element_basic_type(n) == T_LONG);
20369 match(Set dst (MinV src1 src2));
20370 match(Set dst (MaxV src1 src2));
20371 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20372 ins_encode %{
20373 assert(UseAVX > 2, "required");
20374
20375 int vlen_enc = vector_length_encoding(this);
20376 int opcode = this->ideal_Opcode();
20377 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20378 assert(elem_bt == T_LONG, "sanity");
20379
20380 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20381 %}
20382 ins_pipe( pipe_slow );
20383 %}
20384
20385 // Float/Double vector Min/Max
20386 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20387 predicate(VM_Version::supports_avx10_2() &&
20388 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20389 match(Set dst (MinV a b));
20390 match(Set dst (MaxV a b));
20391 format %{ "vector_minmaxFP $dst, $a, $b" %}
20392 ins_encode %{
20393 int vlen_enc = vector_length_encoding(this);
20394 int opcode = this->ideal_Opcode();
20395 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20396 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20397 %}
20398 ins_pipe( pipe_slow );
20399 %}
20400
20401 // Float/Double vector Min/Max
20402 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20403 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20404 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20405 UseAVX > 0);
20406 match(Set dst (MinV a b));
20407 match(Set dst (MaxV a b));
20408 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20409 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20410 ins_encode %{
20411 assert(UseAVX > 0, "required");
20412
20413 int opcode = this->ideal_Opcode();
20414 int vlen_enc = vector_length_encoding(this);
20415 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20416
20417 __ vminmax_fp(opcode, elem_bt,
20418 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20419 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20420 %}
20421 ins_pipe( pipe_slow );
20422 %}
20423
20424 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20425 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20426 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20427 match(Set dst (MinV a b));
20428 match(Set dst (MaxV a b));
20429 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20430 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20431 ins_encode %{
20432 assert(UseAVX > 2, "required");
20433
20434 int opcode = this->ideal_Opcode();
20435 int vlen_enc = vector_length_encoding(this);
20436 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20437
20438 __ evminmax_fp(opcode, elem_bt,
20439 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20440 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20441 %}
20442 ins_pipe( pipe_slow );
20443 %}
20444
20445 // ------------------------------ Unsigned vector Min/Max ----------------------
20446
20447 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20448 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20449 match(Set dst (UMinV a b));
20450 match(Set dst (UMaxV a b));
20451 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20452 ins_encode %{
20453 int opcode = this->ideal_Opcode();
20454 int vlen_enc = vector_length_encoding(this);
20455 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20456 assert(is_integral_type(elem_bt), "");
20457 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20458 %}
20459 ins_pipe( pipe_slow );
20460 %}
20461
20462 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20463 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20464 match(Set dst (UMinV a (LoadVector b)));
20465 match(Set dst (UMaxV a (LoadVector b)));
20466 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20467 ins_encode %{
20468 int opcode = this->ideal_Opcode();
20469 int vlen_enc = vector_length_encoding(this);
20470 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20471 assert(is_integral_type(elem_bt), "");
20472 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20473 %}
20474 ins_pipe( pipe_slow );
20475 %}
20476
20477 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20478 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20479 match(Set dst (UMinV a b));
20480 match(Set dst (UMaxV a b));
20481 effect(TEMP xtmp1, TEMP xtmp2);
20482 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20483 ins_encode %{
20484 int opcode = this->ideal_Opcode();
20485 int vlen_enc = vector_length_encoding(this);
20486 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20487 %}
20488 ins_pipe( pipe_slow );
20489 %}
20490
20491 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20492 match(Set dst (UMinV (Binary dst src2) mask));
20493 match(Set dst (UMaxV (Binary dst src2) mask));
20494 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20495 ins_encode %{
20496 int vlen_enc = vector_length_encoding(this);
20497 BasicType bt = Matcher::vector_element_basic_type(this);
20498 int opc = this->ideal_Opcode();
20499 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20500 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20501 %}
20502 ins_pipe( pipe_slow );
20503 %}
20504
20505 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20506 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20507 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20508 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20509 ins_encode %{
20510 int vlen_enc = vector_length_encoding(this);
20511 BasicType bt = Matcher::vector_element_basic_type(this);
20512 int opc = this->ideal_Opcode();
20513 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20514 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20515 %}
20516 ins_pipe( pipe_slow );
20517 %}
20518
20519 // --------------------------------- Signum/CopySign ---------------------------
20520
20521 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20522 match(Set dst (SignumF dst (Binary zero one)));
20523 effect(KILL cr);
20524 format %{ "signumF $dst, $dst" %}
20525 ins_encode %{
20526 int opcode = this->ideal_Opcode();
20527 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20528 %}
20529 ins_pipe( pipe_slow );
20530 %}
20531
20532 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20533 match(Set dst (SignumD dst (Binary zero one)));
20534 effect(KILL cr);
20535 format %{ "signumD $dst, $dst" %}
20536 ins_encode %{
20537 int opcode = this->ideal_Opcode();
20538 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20539 %}
20540 ins_pipe( pipe_slow );
20541 %}
20542
20543 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20544 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20545 match(Set dst (SignumVF src (Binary zero one)));
20546 match(Set dst (SignumVD src (Binary zero one)));
20547 effect(TEMP dst, TEMP xtmp1);
20548 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20549 ins_encode %{
20550 int opcode = this->ideal_Opcode();
20551 int vec_enc = vector_length_encoding(this);
20552 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20553 $xtmp1$$XMMRegister, vec_enc);
20554 %}
20555 ins_pipe( pipe_slow );
20556 %}
20557
20558 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20559 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20560 match(Set dst (SignumVF src (Binary zero one)));
20561 match(Set dst (SignumVD src (Binary zero one)));
20562 effect(TEMP dst, TEMP ktmp1);
20563 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20564 ins_encode %{
20565 int opcode = this->ideal_Opcode();
20566 int vec_enc = vector_length_encoding(this);
20567 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20568 $ktmp1$$KRegister, vec_enc);
20569 %}
20570 ins_pipe( pipe_slow );
20571 %}
20572
20573 // ---------------------------------------
20574 // For copySign use 0xE4 as writemask for vpternlog
20575 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20576 // C (xmm2) is set to 0x7FFFFFFF
20577 // Wherever xmm2 is 0, we want to pick from B (sign)
20578 // Wherever xmm2 is 1, we want to pick from A (src)
20579 //
20580 // A B C Result
20581 // 0 0 0 0
20582 // 0 0 1 0
20583 // 0 1 0 1
20584 // 0 1 1 0
20585 // 1 0 0 0
20586 // 1 0 1 1
20587 // 1 1 0 1
20588 // 1 1 1 1
20589 //
20590 // Result going from high bit to low bit is 0x11100100 = 0xe4
20591 // ---------------------------------------
20592
20593 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20594 match(Set dst (CopySignF dst src));
20595 effect(TEMP tmp1, TEMP tmp2);
20596 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20597 ins_encode %{
20598 __ movl($tmp2$$Register, 0x7FFFFFFF);
20599 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20600 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20601 %}
20602 ins_pipe( pipe_slow );
20603 %}
20604
20605 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20606 match(Set dst (CopySignD dst (Binary src zero)));
20607 ins_cost(100);
20608 effect(TEMP tmp1, TEMP tmp2);
20609 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20610 ins_encode %{
20611 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20612 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20613 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20614 %}
20615 ins_pipe( pipe_slow );
20616 %}
20617
20618 //----------------------------- CompressBits/ExpandBits ------------------------
20619
20620 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20621 predicate(n->bottom_type()->isa_int());
20622 match(Set dst (CompressBits src mask));
20623 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20624 ins_encode %{
20625 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20626 %}
20627 ins_pipe( pipe_slow );
20628 %}
20629
20630 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20631 predicate(n->bottom_type()->isa_int());
20632 match(Set dst (ExpandBits src mask));
20633 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20634 ins_encode %{
20635 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20636 %}
20637 ins_pipe( pipe_slow );
20638 %}
20639
20640 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20641 predicate(n->bottom_type()->isa_int());
20642 match(Set dst (CompressBits src (LoadI mask)));
20643 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20644 ins_encode %{
20645 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20646 %}
20647 ins_pipe( pipe_slow );
20648 %}
20649
20650 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20651 predicate(n->bottom_type()->isa_int());
20652 match(Set dst (ExpandBits src (LoadI mask)));
20653 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20654 ins_encode %{
20655 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20656 %}
20657 ins_pipe( pipe_slow );
20658 %}
20659
20660 // --------------------------------- Sqrt --------------------------------------
20661
20662 instruct vsqrtF_reg(vec dst, vec src) %{
20663 match(Set dst (SqrtVF src));
20664 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
20665 ins_encode %{
20666 assert(UseAVX > 0, "required");
20667 int vlen_enc = vector_length_encoding(this);
20668 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20669 %}
20670 ins_pipe( pipe_slow );
20671 %}
20672
20673 instruct vsqrtF_mem(vec dst, memory mem) %{
20674 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20675 match(Set dst (SqrtVF (LoadVector mem)));
20676 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
20677 ins_encode %{
20678 assert(UseAVX > 0, "required");
20679 int vlen_enc = vector_length_encoding(this);
20680 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20681 %}
20682 ins_pipe( pipe_slow );
20683 %}
20684
20685 // Floating point vector sqrt
20686 instruct vsqrtD_reg(vec dst, vec src) %{
20687 match(Set dst (SqrtVD src));
20688 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
20689 ins_encode %{
20690 assert(UseAVX > 0, "required");
20691 int vlen_enc = vector_length_encoding(this);
20692 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20693 %}
20694 ins_pipe( pipe_slow );
20695 %}
20696
20697 instruct vsqrtD_mem(vec dst, memory mem) %{
20698 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20699 match(Set dst (SqrtVD (LoadVector mem)));
20700 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
20701 ins_encode %{
20702 assert(UseAVX > 0, "required");
20703 int vlen_enc = vector_length_encoding(this);
20704 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20705 %}
20706 ins_pipe( pipe_slow );
20707 %}
20708
20709 // ------------------------------ Shift ---------------------------------------
20710
20711 // Left and right shift count vectors are the same on x86
20712 // (only lowest bits of xmm reg are used for count).
20713 instruct vshiftcnt(vec dst, rRegI cnt) %{
20714 match(Set dst (LShiftCntV cnt));
20715 match(Set dst (RShiftCntV cnt));
20716 format %{ "movdl $dst,$cnt\t! load shift count" %}
20717 ins_encode %{
20718 __ movdl($dst$$XMMRegister, $cnt$$Register);
20719 %}
20720 ins_pipe( pipe_slow );
20721 %}
20722
20723 // Byte vector shift
20724 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
20725 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
20726 match(Set dst ( LShiftVB src shift));
20727 match(Set dst ( RShiftVB src shift));
20728 match(Set dst (URShiftVB src shift));
20729 effect(TEMP dst, USE src, USE shift, TEMP tmp);
20730 format %{"vector_byte_shift $dst,$src,$shift" %}
20731 ins_encode %{
20732 assert(UseSSE > 3, "required");
20733 int opcode = this->ideal_Opcode();
20734 bool sign = (opcode != Op_URShiftVB);
20735 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
20736 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
20737 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20738 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
20739 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20740 %}
20741 ins_pipe( pipe_slow );
20742 %}
20743
20744 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20745 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20746 UseAVX <= 1);
20747 match(Set dst ( LShiftVB src shift));
20748 match(Set dst ( RShiftVB src shift));
20749 match(Set dst (URShiftVB src shift));
20750 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
20751 format %{"vector_byte_shift $dst,$src,$shift" %}
20752 ins_encode %{
20753 assert(UseSSE > 3, "required");
20754 int opcode = this->ideal_Opcode();
20755 bool sign = (opcode != Op_URShiftVB);
20756 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
20757 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
20758 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
20759 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
20760 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
20761 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20762 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
20763 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
20764 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
20765 %}
20766 ins_pipe( pipe_slow );
20767 %}
20768
20769 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
20770 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20771 UseAVX > 1);
20772 match(Set dst ( LShiftVB src shift));
20773 match(Set dst ( RShiftVB src shift));
20774 match(Set dst (URShiftVB src shift));
20775 effect(TEMP dst, TEMP tmp);
20776 format %{"vector_byte_shift $dst,$src,$shift" %}
20777 ins_encode %{
20778 int opcode = this->ideal_Opcode();
20779 bool sign = (opcode != Op_URShiftVB);
20780 int vlen_enc = Assembler::AVX_256bit;
20781 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
20782 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20783 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20784 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
20785 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
20786 %}
20787 ins_pipe( pipe_slow );
20788 %}
20789
20790 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
20791 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
20792 match(Set dst ( LShiftVB src shift));
20793 match(Set dst ( RShiftVB src shift));
20794 match(Set dst (URShiftVB src shift));
20795 effect(TEMP dst, TEMP tmp);
20796 format %{"vector_byte_shift $dst,$src,$shift" %}
20797 ins_encode %{
20798 assert(UseAVX > 1, "required");
20799 int opcode = this->ideal_Opcode();
20800 bool sign = (opcode != Op_URShiftVB);
20801 int vlen_enc = Assembler::AVX_256bit;
20802 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
20803 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20804 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20805 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20806 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20807 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20808 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20809 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20810 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
20811 %}
20812 ins_pipe( pipe_slow );
20813 %}
20814
20815 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20816 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
20817 match(Set dst ( LShiftVB src shift));
20818 match(Set dst (RShiftVB src shift));
20819 match(Set dst (URShiftVB src shift));
20820 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
20821 format %{"vector_byte_shift $dst,$src,$shift" %}
20822 ins_encode %{
20823 assert(UseAVX > 2, "required");
20824 int opcode = this->ideal_Opcode();
20825 bool sign = (opcode != Op_URShiftVB);
20826 int vlen_enc = Assembler::AVX_512bit;
20827 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
20828 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
20829 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
20830 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20831 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20832 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20833 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20834 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20835 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20836 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
20837 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
20838 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20839 %}
20840 ins_pipe( pipe_slow );
20841 %}
20842
20843 // Shorts vector logical right shift produces incorrect Java result
20844 // for negative data because java code convert short value into int with
20845 // sign extension before a shift. But char vectors are fine since chars are
20846 // unsigned values.
20847 // Shorts/Chars vector left shift
20848 instruct vshiftS(vec dst, vec src, vec shift) %{
20849 predicate(!n->as_ShiftV()->is_var_shift());
20850 match(Set dst ( LShiftVS src shift));
20851 match(Set dst ( RShiftVS src shift));
20852 match(Set dst (URShiftVS src shift));
20853 effect(TEMP dst, USE src, USE shift);
20854 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
20855 ins_encode %{
20856 int opcode = this->ideal_Opcode();
20857 if (UseAVX > 0) {
20858 int vlen_enc = vector_length_encoding(this);
20859 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20860 } else {
20861 int vlen = Matcher::vector_length(this);
20862 if (vlen == 2) {
20863 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
20864 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20865 } else if (vlen == 4) {
20866 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20867 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20868 } else {
20869 assert (vlen == 8, "sanity");
20870 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20871 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20872 }
20873 }
20874 %}
20875 ins_pipe( pipe_slow );
20876 %}
20877
20878 // Integers vector left shift
20879 instruct vshiftI(vec dst, vec src, vec shift) %{
20880 predicate(!n->as_ShiftV()->is_var_shift());
20881 match(Set dst ( LShiftVI src shift));
20882 match(Set dst ( RShiftVI src shift));
20883 match(Set dst (URShiftVI src shift));
20884 effect(TEMP dst, USE src, USE shift);
20885 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
20886 ins_encode %{
20887 int opcode = this->ideal_Opcode();
20888 if (UseAVX > 0) {
20889 int vlen_enc = vector_length_encoding(this);
20890 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20891 } else {
20892 int vlen = Matcher::vector_length(this);
20893 if (vlen == 2) {
20894 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20895 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20896 } else {
20897 assert(vlen == 4, "sanity");
20898 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20899 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20900 }
20901 }
20902 %}
20903 ins_pipe( pipe_slow );
20904 %}
20905
20906 // Integers vector left constant shift
20907 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
20908 match(Set dst (LShiftVI src (LShiftCntV shift)));
20909 match(Set dst (RShiftVI src (RShiftCntV shift)));
20910 match(Set dst (URShiftVI src (RShiftCntV shift)));
20911 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
20912 ins_encode %{
20913 int opcode = this->ideal_Opcode();
20914 if (UseAVX > 0) {
20915 int vector_len = vector_length_encoding(this);
20916 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20917 } else {
20918 int vlen = Matcher::vector_length(this);
20919 if (vlen == 2) {
20920 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20921 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20922 } else {
20923 assert(vlen == 4, "sanity");
20924 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20925 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20926 }
20927 }
20928 %}
20929 ins_pipe( pipe_slow );
20930 %}
20931
20932 // Longs vector shift
20933 instruct vshiftL(vec dst, vec src, vec shift) %{
20934 predicate(!n->as_ShiftV()->is_var_shift());
20935 match(Set dst ( LShiftVL src shift));
20936 match(Set dst (URShiftVL src shift));
20937 effect(TEMP dst, USE src, USE shift);
20938 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
20939 ins_encode %{
20940 int opcode = this->ideal_Opcode();
20941 if (UseAVX > 0) {
20942 int vlen_enc = vector_length_encoding(this);
20943 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20944 } else {
20945 assert(Matcher::vector_length(this) == 2, "");
20946 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20947 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20948 }
20949 %}
20950 ins_pipe( pipe_slow );
20951 %}
20952
20953 // Longs vector constant shift
20954 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
20955 match(Set dst (LShiftVL src (LShiftCntV shift)));
20956 match(Set dst (URShiftVL src (RShiftCntV shift)));
20957 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
20958 ins_encode %{
20959 int opcode = this->ideal_Opcode();
20960 if (UseAVX > 0) {
20961 int vector_len = vector_length_encoding(this);
20962 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20963 } else {
20964 assert(Matcher::vector_length(this) == 2, "");
20965 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20966 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20967 }
20968 %}
20969 ins_pipe( pipe_slow );
20970 %}
20971
20972 // -------------------ArithmeticRightShift -----------------------------------
20973 // Long vector arithmetic right shift
20974 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
20975 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
20976 match(Set dst (RShiftVL src shift));
20977 effect(TEMP dst, TEMP tmp);
20978 format %{ "vshiftq $dst,$src,$shift" %}
20979 ins_encode %{
20980 uint vlen = Matcher::vector_length(this);
20981 if (vlen == 2) {
20982 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20983 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
20984 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
20985 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
20986 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
20987 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
20988 } else {
20989 assert(vlen == 4, "sanity");
20990 assert(UseAVX > 1, "required");
20991 int vlen_enc = Assembler::AVX_256bit;
20992 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20993 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
20994 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20995 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20996 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20997 }
20998 %}
20999 ins_pipe( pipe_slow );
21000 %}
21001
21002 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21003 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21004 match(Set dst (RShiftVL src shift));
21005 format %{ "vshiftq $dst,$src,$shift" %}
21006 ins_encode %{
21007 int vlen_enc = vector_length_encoding(this);
21008 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21009 %}
21010 ins_pipe( pipe_slow );
21011 %}
21012
21013 // ------------------- Variable Shift -----------------------------
21014 // Byte variable shift
21015 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21016 predicate(Matcher::vector_length(n) <= 8 &&
21017 n->as_ShiftV()->is_var_shift() &&
21018 !VM_Version::supports_avx512bw());
21019 match(Set dst ( LShiftVB src shift));
21020 match(Set dst ( RShiftVB src shift));
21021 match(Set dst (URShiftVB src shift));
21022 effect(TEMP dst, TEMP vtmp);
21023 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21024 ins_encode %{
21025 assert(UseAVX >= 2, "required");
21026
21027 int opcode = this->ideal_Opcode();
21028 int vlen_enc = Assembler::AVX_128bit;
21029 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21030 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21031 %}
21032 ins_pipe( pipe_slow );
21033 %}
21034
21035 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21036 predicate(Matcher::vector_length(n) == 16 &&
21037 n->as_ShiftV()->is_var_shift() &&
21038 !VM_Version::supports_avx512bw());
21039 match(Set dst ( LShiftVB src shift));
21040 match(Set dst ( RShiftVB src shift));
21041 match(Set dst (URShiftVB src shift));
21042 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21043 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21044 ins_encode %{
21045 assert(UseAVX >= 2, "required");
21046
21047 int opcode = this->ideal_Opcode();
21048 int vlen_enc = Assembler::AVX_128bit;
21049 // Shift lower half and get word result in dst
21050 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21051
21052 // Shift upper half and get word result in vtmp1
21053 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21054 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21055 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21056
21057 // Merge and down convert the two word results to byte in dst
21058 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21059 %}
21060 ins_pipe( pipe_slow );
21061 %}
21062
21063 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21064 predicate(Matcher::vector_length(n) == 32 &&
21065 n->as_ShiftV()->is_var_shift() &&
21066 !VM_Version::supports_avx512bw());
21067 match(Set dst ( LShiftVB src shift));
21068 match(Set dst ( RShiftVB src shift));
21069 match(Set dst (URShiftVB src shift));
21070 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21071 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21072 ins_encode %{
21073 assert(UseAVX >= 2, "required");
21074
21075 int opcode = this->ideal_Opcode();
21076 int vlen_enc = Assembler::AVX_128bit;
21077 // Process lower 128 bits and get result in dst
21078 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21079 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21080 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21081 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21082 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21083
21084 // Process higher 128 bits and get result in vtmp3
21085 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21086 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21087 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21088 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21089 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21090 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21091 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21092
21093 // Merge the two results in dst
21094 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21095 %}
21096 ins_pipe( pipe_slow );
21097 %}
21098
21099 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21100 predicate(Matcher::vector_length(n) <= 32 &&
21101 n->as_ShiftV()->is_var_shift() &&
21102 VM_Version::supports_avx512bw());
21103 match(Set dst ( LShiftVB src shift));
21104 match(Set dst ( RShiftVB src shift));
21105 match(Set dst (URShiftVB src shift));
21106 effect(TEMP dst, TEMP vtmp);
21107 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21108 ins_encode %{
21109 assert(UseAVX > 2, "required");
21110
21111 int opcode = this->ideal_Opcode();
21112 int vlen_enc = vector_length_encoding(this);
21113 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21114 %}
21115 ins_pipe( pipe_slow );
21116 %}
21117
21118 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21119 predicate(Matcher::vector_length(n) == 64 &&
21120 n->as_ShiftV()->is_var_shift() &&
21121 VM_Version::supports_avx512bw());
21122 match(Set dst ( LShiftVB src shift));
21123 match(Set dst ( RShiftVB src shift));
21124 match(Set dst (URShiftVB src shift));
21125 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21126 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21127 ins_encode %{
21128 assert(UseAVX > 2, "required");
21129
21130 int opcode = this->ideal_Opcode();
21131 int vlen_enc = Assembler::AVX_256bit;
21132 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21133 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21134 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21135 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21136 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21137 %}
21138 ins_pipe( pipe_slow );
21139 %}
21140
21141 // Short variable shift
21142 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21143 predicate(Matcher::vector_length(n) <= 8 &&
21144 n->as_ShiftV()->is_var_shift() &&
21145 !VM_Version::supports_avx512bw());
21146 match(Set dst ( LShiftVS src shift));
21147 match(Set dst ( RShiftVS src shift));
21148 match(Set dst (URShiftVS src shift));
21149 effect(TEMP dst, TEMP vtmp);
21150 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21151 ins_encode %{
21152 assert(UseAVX >= 2, "required");
21153
21154 int opcode = this->ideal_Opcode();
21155 bool sign = (opcode != Op_URShiftVS);
21156 int vlen_enc = Assembler::AVX_256bit;
21157 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21158 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21159 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21160 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21161 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21162 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21163 %}
21164 ins_pipe( pipe_slow );
21165 %}
21166
21167 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21168 predicate(Matcher::vector_length(n) == 16 &&
21169 n->as_ShiftV()->is_var_shift() &&
21170 !VM_Version::supports_avx512bw());
21171 match(Set dst ( LShiftVS src shift));
21172 match(Set dst ( RShiftVS src shift));
21173 match(Set dst (URShiftVS src shift));
21174 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21175 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21176 ins_encode %{
21177 assert(UseAVX >= 2, "required");
21178
21179 int opcode = this->ideal_Opcode();
21180 bool sign = (opcode != Op_URShiftVS);
21181 int vlen_enc = Assembler::AVX_256bit;
21182 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21183 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21184 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21185 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21186 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21187
21188 // Shift upper half, with result in dst using vtmp1 as TEMP
21189 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21190 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21191 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21192 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21193 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21194 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21195
21196 // Merge lower and upper half result into dst
21197 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21198 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21199 %}
21200 ins_pipe( pipe_slow );
21201 %}
21202
21203 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21204 predicate(n->as_ShiftV()->is_var_shift() &&
21205 VM_Version::supports_avx512bw());
21206 match(Set dst ( LShiftVS src shift));
21207 match(Set dst ( RShiftVS src shift));
21208 match(Set dst (URShiftVS src shift));
21209 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21210 ins_encode %{
21211 assert(UseAVX > 2, "required");
21212
21213 int opcode = this->ideal_Opcode();
21214 int vlen_enc = vector_length_encoding(this);
21215 if (!VM_Version::supports_avx512vl()) {
21216 vlen_enc = Assembler::AVX_512bit;
21217 }
21218 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21219 %}
21220 ins_pipe( pipe_slow );
21221 %}
21222
21223 //Integer variable shift
21224 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21225 predicate(n->as_ShiftV()->is_var_shift());
21226 match(Set dst ( LShiftVI src shift));
21227 match(Set dst ( RShiftVI src shift));
21228 match(Set dst (URShiftVI src shift));
21229 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21230 ins_encode %{
21231 assert(UseAVX >= 2, "required");
21232
21233 int opcode = this->ideal_Opcode();
21234 int vlen_enc = vector_length_encoding(this);
21235 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21236 %}
21237 ins_pipe( pipe_slow );
21238 %}
21239
21240 //Long variable shift
21241 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21242 predicate(n->as_ShiftV()->is_var_shift());
21243 match(Set dst ( LShiftVL src shift));
21244 match(Set dst (URShiftVL src shift));
21245 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21246 ins_encode %{
21247 assert(UseAVX >= 2, "required");
21248
21249 int opcode = this->ideal_Opcode();
21250 int vlen_enc = vector_length_encoding(this);
21251 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21252 %}
21253 ins_pipe( pipe_slow );
21254 %}
21255
21256 //Long variable right shift arithmetic
21257 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21258 predicate(Matcher::vector_length(n) <= 4 &&
21259 n->as_ShiftV()->is_var_shift() &&
21260 UseAVX == 2);
21261 match(Set dst (RShiftVL src shift));
21262 effect(TEMP dst, TEMP vtmp);
21263 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21264 ins_encode %{
21265 int opcode = this->ideal_Opcode();
21266 int vlen_enc = vector_length_encoding(this);
21267 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21268 $vtmp$$XMMRegister);
21269 %}
21270 ins_pipe( pipe_slow );
21271 %}
21272
21273 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21274 predicate(n->as_ShiftV()->is_var_shift() &&
21275 UseAVX > 2);
21276 match(Set dst (RShiftVL src shift));
21277 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21278 ins_encode %{
21279 int opcode = this->ideal_Opcode();
21280 int vlen_enc = vector_length_encoding(this);
21281 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21282 %}
21283 ins_pipe( pipe_slow );
21284 %}
21285
21286 // --------------------------------- AND --------------------------------------
21287
21288 instruct vand(vec dst, vec src) %{
21289 predicate(UseAVX == 0);
21290 match(Set dst (AndV dst src));
21291 format %{ "pand $dst,$src\t! and vectors" %}
21292 ins_encode %{
21293 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21294 %}
21295 ins_pipe( pipe_slow );
21296 %}
21297
21298 instruct vand_reg(vec dst, vec src1, vec src2) %{
21299 predicate(UseAVX > 0);
21300 match(Set dst (AndV src1 src2));
21301 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21302 ins_encode %{
21303 int vlen_enc = vector_length_encoding(this);
21304 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21305 %}
21306 ins_pipe( pipe_slow );
21307 %}
21308
21309 instruct vand_mem(vec dst, vec src, memory mem) %{
21310 predicate((UseAVX > 0) &&
21311 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21312 match(Set dst (AndV src (LoadVector mem)));
21313 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21314 ins_encode %{
21315 int vlen_enc = vector_length_encoding(this);
21316 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21317 %}
21318 ins_pipe( pipe_slow );
21319 %}
21320
21321 // --------------------------------- OR ---------------------------------------
21322
21323 instruct vor(vec dst, vec src) %{
21324 predicate(UseAVX == 0);
21325 match(Set dst (OrV dst src));
21326 format %{ "por $dst,$src\t! or vectors" %}
21327 ins_encode %{
21328 __ por($dst$$XMMRegister, $src$$XMMRegister);
21329 %}
21330 ins_pipe( pipe_slow );
21331 %}
21332
21333 instruct vor_reg(vec dst, vec src1, vec src2) %{
21334 predicate(UseAVX > 0);
21335 match(Set dst (OrV src1 src2));
21336 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21337 ins_encode %{
21338 int vlen_enc = vector_length_encoding(this);
21339 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21340 %}
21341 ins_pipe( pipe_slow );
21342 %}
21343
21344 instruct vor_mem(vec dst, vec src, memory mem) %{
21345 predicate((UseAVX > 0) &&
21346 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21347 match(Set dst (OrV src (LoadVector mem)));
21348 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21349 ins_encode %{
21350 int vlen_enc = vector_length_encoding(this);
21351 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21352 %}
21353 ins_pipe( pipe_slow );
21354 %}
21355
21356 // --------------------------------- XOR --------------------------------------
21357
21358 instruct vxor(vec dst, vec src) %{
21359 predicate(UseAVX == 0);
21360 match(Set dst (XorV dst src));
21361 format %{ "pxor $dst,$src\t! xor vectors" %}
21362 ins_encode %{
21363 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21364 %}
21365 ins_pipe( pipe_slow );
21366 %}
21367
21368 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21369 predicate(UseAVX > 0);
21370 match(Set dst (XorV src1 src2));
21371 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21372 ins_encode %{
21373 int vlen_enc = vector_length_encoding(this);
21374 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21375 %}
21376 ins_pipe( pipe_slow );
21377 %}
21378
21379 instruct vxor_mem(vec dst, vec src, memory mem) %{
21380 predicate((UseAVX > 0) &&
21381 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21382 match(Set dst (XorV src (LoadVector mem)));
21383 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21384 ins_encode %{
21385 int vlen_enc = vector_length_encoding(this);
21386 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21387 %}
21388 ins_pipe( pipe_slow );
21389 %}
21390
21391 // --------------------------------- VectorCast --------------------------------------
21392
21393 instruct vcastBtoX(vec dst, vec src) %{
21394 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21395 match(Set dst (VectorCastB2X src));
21396 format %{ "vector_cast_b2x $dst,$src\t!" %}
21397 ins_encode %{
21398 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21399 int vlen_enc = vector_length_encoding(this);
21400 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21401 %}
21402 ins_pipe( pipe_slow );
21403 %}
21404
21405 instruct vcastBtoD(legVec dst, legVec src) %{
21406 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21407 match(Set dst (VectorCastB2X src));
21408 format %{ "vector_cast_b2x $dst,$src\t!" %}
21409 ins_encode %{
21410 int vlen_enc = vector_length_encoding(this);
21411 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21412 %}
21413 ins_pipe( pipe_slow );
21414 %}
21415
21416 instruct castStoX(vec dst, vec src) %{
21417 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21418 Matcher::vector_length(n->in(1)) <= 8 && // src
21419 Matcher::vector_element_basic_type(n) == T_BYTE);
21420 match(Set dst (VectorCastS2X src));
21421 format %{ "vector_cast_s2x $dst,$src" %}
21422 ins_encode %{
21423 assert(UseAVX > 0, "required");
21424
21425 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21426 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21427 %}
21428 ins_pipe( pipe_slow );
21429 %}
21430
21431 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21432 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21433 Matcher::vector_length(n->in(1)) == 16 && // src
21434 Matcher::vector_element_basic_type(n) == T_BYTE);
21435 effect(TEMP dst, TEMP vtmp);
21436 match(Set dst (VectorCastS2X src));
21437 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21438 ins_encode %{
21439 assert(UseAVX > 0, "required");
21440
21441 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21442 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21443 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21444 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21445 %}
21446 ins_pipe( pipe_slow );
21447 %}
21448
21449 instruct vcastStoX_evex(vec dst, vec src) %{
21450 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21451 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21452 match(Set dst (VectorCastS2X src));
21453 format %{ "vector_cast_s2x $dst,$src\t!" %}
21454 ins_encode %{
21455 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21456 int src_vlen_enc = vector_length_encoding(this, $src);
21457 int vlen_enc = vector_length_encoding(this);
21458 switch (to_elem_bt) {
21459 case T_BYTE:
21460 if (!VM_Version::supports_avx512vl()) {
21461 vlen_enc = Assembler::AVX_512bit;
21462 }
21463 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21464 break;
21465 case T_INT:
21466 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21467 break;
21468 case T_FLOAT:
21469 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21470 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21471 break;
21472 case T_LONG:
21473 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21474 break;
21475 case T_DOUBLE: {
21476 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21477 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21478 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21479 break;
21480 }
21481 default:
21482 ShouldNotReachHere();
21483 }
21484 %}
21485 ins_pipe( pipe_slow );
21486 %}
21487
21488 instruct castItoX(vec dst, vec src) %{
21489 predicate(UseAVX <= 2 &&
21490 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21491 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21492 match(Set dst (VectorCastI2X src));
21493 format %{ "vector_cast_i2x $dst,$src" %}
21494 ins_encode %{
21495 assert(UseAVX > 0, "required");
21496
21497 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21498 int vlen_enc = vector_length_encoding(this, $src);
21499
21500 if (to_elem_bt == T_BYTE) {
21501 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21502 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21503 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21504 } else {
21505 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21506 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21507 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21508 }
21509 %}
21510 ins_pipe( pipe_slow );
21511 %}
21512
21513 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21514 predicate(UseAVX <= 2 &&
21515 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21516 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21517 match(Set dst (VectorCastI2X src));
21518 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21519 effect(TEMP dst, TEMP vtmp);
21520 ins_encode %{
21521 assert(UseAVX > 0, "required");
21522
21523 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21524 int vlen_enc = vector_length_encoding(this, $src);
21525
21526 if (to_elem_bt == T_BYTE) {
21527 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21528 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21529 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21530 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21531 } else {
21532 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21533 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21534 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21535 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21536 }
21537 %}
21538 ins_pipe( pipe_slow );
21539 %}
21540
21541 instruct vcastItoX_evex(vec dst, vec src) %{
21542 predicate(UseAVX > 2 ||
21543 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21544 match(Set dst (VectorCastI2X src));
21545 format %{ "vector_cast_i2x $dst,$src\t!" %}
21546 ins_encode %{
21547 assert(UseAVX > 0, "required");
21548
21549 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21550 int src_vlen_enc = vector_length_encoding(this, $src);
21551 int dst_vlen_enc = vector_length_encoding(this);
21552 switch (dst_elem_bt) {
21553 case T_BYTE:
21554 if (!VM_Version::supports_avx512vl()) {
21555 src_vlen_enc = Assembler::AVX_512bit;
21556 }
21557 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21558 break;
21559 case T_SHORT:
21560 if (!VM_Version::supports_avx512vl()) {
21561 src_vlen_enc = Assembler::AVX_512bit;
21562 }
21563 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21564 break;
21565 case T_FLOAT:
21566 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21567 break;
21568 case T_LONG:
21569 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21570 break;
21571 case T_DOUBLE:
21572 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21573 break;
21574 default:
21575 ShouldNotReachHere();
21576 }
21577 %}
21578 ins_pipe( pipe_slow );
21579 %}
21580
21581 instruct vcastLtoBS(vec dst, vec src) %{
21582 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21583 UseAVX <= 2);
21584 match(Set dst (VectorCastL2X src));
21585 format %{ "vector_cast_l2x $dst,$src" %}
21586 ins_encode %{
21587 assert(UseAVX > 0, "required");
21588
21589 int vlen = Matcher::vector_length_in_bytes(this, $src);
21590 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21591 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21592 : ExternalAddress(vector_int_to_short_mask());
21593 if (vlen <= 16) {
21594 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21595 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21596 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21597 } else {
21598 assert(vlen <= 32, "required");
21599 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21600 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21601 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21602 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21603 }
21604 if (to_elem_bt == T_BYTE) {
21605 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21606 }
21607 %}
21608 ins_pipe( pipe_slow );
21609 %}
21610
21611 instruct vcastLtoX_evex(vec dst, vec src) %{
21612 predicate(UseAVX > 2 ||
21613 (Matcher::vector_element_basic_type(n) == T_INT ||
21614 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21615 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21616 match(Set dst (VectorCastL2X src));
21617 format %{ "vector_cast_l2x $dst,$src\t!" %}
21618 ins_encode %{
21619 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21620 int vlen = Matcher::vector_length_in_bytes(this, $src);
21621 int vlen_enc = vector_length_encoding(this, $src);
21622 switch (to_elem_bt) {
21623 case T_BYTE:
21624 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21625 vlen_enc = Assembler::AVX_512bit;
21626 }
21627 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21628 break;
21629 case T_SHORT:
21630 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21631 vlen_enc = Assembler::AVX_512bit;
21632 }
21633 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21634 break;
21635 case T_INT:
21636 if (vlen == 8) {
21637 if ($dst$$XMMRegister != $src$$XMMRegister) {
21638 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21639 }
21640 } else if (vlen == 16) {
21641 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21642 } else if (vlen == 32) {
21643 if (UseAVX > 2) {
21644 if (!VM_Version::supports_avx512vl()) {
21645 vlen_enc = Assembler::AVX_512bit;
21646 }
21647 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21648 } else {
21649 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21650 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21651 }
21652 } else { // vlen == 64
21653 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21654 }
21655 break;
21656 case T_FLOAT:
21657 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21658 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21659 break;
21660 case T_DOUBLE:
21661 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21662 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21663 break;
21664
21665 default: assert(false, "%s", type2name(to_elem_bt));
21666 }
21667 %}
21668 ins_pipe( pipe_slow );
21669 %}
21670
21671 instruct vcastFtoD_reg(vec dst, vec src) %{
21672 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21673 match(Set dst (VectorCastF2X src));
21674 format %{ "vector_cast_f2d $dst,$src\t!" %}
21675 ins_encode %{
21676 int vlen_enc = vector_length_encoding(this);
21677 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21678 %}
21679 ins_pipe( pipe_slow );
21680 %}
21681
21682
21683 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21684 predicate(!VM_Version::supports_avx10_2() &&
21685 !VM_Version::supports_avx512vl() &&
21686 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21687 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21688 is_integral_type(Matcher::vector_element_basic_type(n)));
21689 match(Set dst (VectorCastF2X src));
21690 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21691 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21692 ins_encode %{
21693 int vlen_enc = vector_length_encoding(this, $src);
21694 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21695 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21696 // 32 bit addresses for register indirect addressing mode since stub constants
21697 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21698 // However, targets are free to increase this limit, but having a large code cache size
21699 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21700 // cap we save a temporary register allocation which in limiting case can prevent
21701 // spilling in high register pressure blocks.
21702 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21703 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21704 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21705 %}
21706 ins_pipe( pipe_slow );
21707 %}
21708
21709 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21710 predicate(!VM_Version::supports_avx10_2() &&
21711 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21712 is_integral_type(Matcher::vector_element_basic_type(n)));
21713 match(Set dst (VectorCastF2X src));
21714 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21715 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21716 ins_encode %{
21717 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21718 if (to_elem_bt == T_LONG) {
21719 int vlen_enc = vector_length_encoding(this);
21720 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21721 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21722 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
21723 } else {
21724 int vlen_enc = vector_length_encoding(this, $src);
21725 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21726 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21727 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21728 }
21729 %}
21730 ins_pipe( pipe_slow );
21731 %}
21732
21733 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
21734 predicate(VM_Version::supports_avx10_2() &&
21735 is_integral_type(Matcher::vector_element_basic_type(n)));
21736 match(Set dst (VectorCastF2X src));
21737 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21738 ins_encode %{
21739 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21740 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
21741 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21742 %}
21743 ins_pipe( pipe_slow );
21744 %}
21745
21746 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
21747 predicate(VM_Version::supports_avx10_2() &&
21748 is_integral_type(Matcher::vector_element_basic_type(n)));
21749 match(Set dst (VectorCastF2X (LoadVector src)));
21750 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21751 ins_encode %{
21752 int vlen = Matcher::vector_length(this);
21753 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21754 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
21755 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21756 %}
21757 ins_pipe( pipe_slow );
21758 %}
21759
21760 instruct vcastDtoF_reg(vec dst, vec src) %{
21761 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
21762 match(Set dst (VectorCastD2X src));
21763 format %{ "vector_cast_d2x $dst,$src\t!" %}
21764 ins_encode %{
21765 int vlen_enc = vector_length_encoding(this, $src);
21766 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21767 %}
21768 ins_pipe( pipe_slow );
21769 %}
21770
21771 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
21772 predicate(!VM_Version::supports_avx10_2() &&
21773 !VM_Version::supports_avx512vl() &&
21774 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21775 is_integral_type(Matcher::vector_element_basic_type(n)));
21776 match(Set dst (VectorCastD2X src));
21777 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
21778 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
21779 ins_encode %{
21780 int vlen_enc = vector_length_encoding(this, $src);
21781 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21782 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21783 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
21784 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21785 %}
21786 ins_pipe( pipe_slow );
21787 %}
21788
21789 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21790 predicate(!VM_Version::supports_avx10_2() &&
21791 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21792 is_integral_type(Matcher::vector_element_basic_type(n)));
21793 match(Set dst (VectorCastD2X src));
21794 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21795 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21796 ins_encode %{
21797 int vlen_enc = vector_length_encoding(this, $src);
21798 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21799 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
21800 ExternalAddress(vector_float_signflip());
21801 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21802 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
21803 %}
21804 ins_pipe( pipe_slow );
21805 %}
21806
21807 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
21808 predicate(VM_Version::supports_avx10_2() &&
21809 is_integral_type(Matcher::vector_element_basic_type(n)));
21810 match(Set dst (VectorCastD2X src));
21811 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21812 ins_encode %{
21813 int vlen_enc = vector_length_encoding(this, $src);
21814 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21815 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21816 %}
21817 ins_pipe( pipe_slow );
21818 %}
21819
21820 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
21821 predicate(VM_Version::supports_avx10_2() &&
21822 is_integral_type(Matcher::vector_element_basic_type(n)));
21823 match(Set dst (VectorCastD2X (LoadVector src)));
21824 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21825 ins_encode %{
21826 int vlen = Matcher::vector_length(this);
21827 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
21828 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21829 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21830 %}
21831 ins_pipe( pipe_slow );
21832 %}
21833
21834 instruct vucast(vec dst, vec src) %{
21835 match(Set dst (VectorUCastB2X src));
21836 match(Set dst (VectorUCastS2X src));
21837 match(Set dst (VectorUCastI2X src));
21838 format %{ "vector_ucast $dst,$src\t!" %}
21839 ins_encode %{
21840 assert(UseAVX > 0, "required");
21841
21842 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
21843 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21844 int vlen_enc = vector_length_encoding(this);
21845 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
21846 %}
21847 ins_pipe( pipe_slow );
21848 %}
21849
21850 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21851 predicate(!VM_Version::supports_avx512vl() &&
21852 Matcher::vector_length_in_bytes(n) < 64 &&
21853 Matcher::vector_element_basic_type(n) == T_INT);
21854 match(Set dst (RoundVF src));
21855 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21856 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
21857 ins_encode %{
21858 int vlen_enc = vector_length_encoding(this);
21859 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21860 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
21861 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21862 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
21863 %}
21864 ins_pipe( pipe_slow );
21865 %}
21866
21867 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21868 predicate((VM_Version::supports_avx512vl() ||
21869 Matcher::vector_length_in_bytes(n) == 64) &&
21870 Matcher::vector_element_basic_type(n) == T_INT);
21871 match(Set dst (RoundVF src));
21872 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21873 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21874 ins_encode %{
21875 int vlen_enc = vector_length_encoding(this);
21876 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21877 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
21878 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21879 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21880 %}
21881 ins_pipe( pipe_slow );
21882 %}
21883
21884 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21885 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
21886 match(Set dst (RoundVD src));
21887 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21888 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21889 ins_encode %{
21890 int vlen_enc = vector_length_encoding(this);
21891 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21892 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
21893 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
21894 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21895 %}
21896 ins_pipe( pipe_slow );
21897 %}
21898
21899 // --------------------------------- VectorMaskCmp --------------------------------------
21900
21901 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21902 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21903 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
21904 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21905 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21906 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21907 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21908 ins_encode %{
21909 int vlen_enc = vector_length_encoding(this, $src1);
21910 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21911 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21912 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21913 } else {
21914 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21915 }
21916 %}
21917 ins_pipe( pipe_slow );
21918 %}
21919
21920 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
21921 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
21922 n->bottom_type()->isa_pvectmask() == nullptr &&
21923 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21924 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21925 effect(TEMP ktmp);
21926 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
21927 ins_encode %{
21928 int vlen_enc = Assembler::AVX_512bit;
21929 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21930 KRegister mask = k0; // The comparison itself is not being masked.
21931 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21932 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21933 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21934 } else {
21935 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21936 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21937 }
21938 %}
21939 ins_pipe( pipe_slow );
21940 %}
21941
21942 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
21943 predicate(n->bottom_type()->isa_pvectmask() &&
21944 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21945 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21946 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
21947 ins_encode %{
21948 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
21949 int vlen_enc = vector_length_encoding(this, $src1);
21950 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21951 KRegister mask = k0; // The comparison itself is not being masked.
21952 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21953 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21954 } else {
21955 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21956 }
21957 %}
21958 ins_pipe( pipe_slow );
21959 %}
21960
21961 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21962 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21963 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21964 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
21965 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21966 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21967 (n->in(2)->get_int() == BoolTest::eq ||
21968 n->in(2)->get_int() == BoolTest::lt ||
21969 n->in(2)->get_int() == BoolTest::gt)); // cond
21970 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21971 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21972 ins_encode %{
21973 int vlen_enc = vector_length_encoding(this, $src1);
21974 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
21975 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
21976 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
21977 %}
21978 ins_pipe( pipe_slow );
21979 %}
21980
21981 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
21982 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21983 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21984 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
21985 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21986 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21987 (n->in(2)->get_int() == BoolTest::ne ||
21988 n->in(2)->get_int() == BoolTest::le ||
21989 n->in(2)->get_int() == BoolTest::ge)); // cond
21990 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21991 effect(TEMP dst, TEMP xtmp);
21992 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
21993 ins_encode %{
21994 int vlen_enc = vector_length_encoding(this, $src1);
21995 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
21996 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
21997 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
21998 %}
21999 ins_pipe( pipe_slow );
22000 %}
22001
22002 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22003 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22004 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22005 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22006 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22007 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22008 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22009 effect(TEMP dst, TEMP xtmp);
22010 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22011 ins_encode %{
22012 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22013 int vlen_enc = vector_length_encoding(this, $src1);
22014 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22015 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22016
22017 if (vlen_enc == Assembler::AVX_128bit) {
22018 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22019 } else {
22020 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22021 }
22022 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22023 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22024 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22025 %}
22026 ins_pipe( pipe_slow );
22027 %}
22028
22029 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22030 predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22031 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22032 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22033 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22034 effect(TEMP ktmp);
22035 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22036 ins_encode %{
22037 assert(UseAVX > 2, "required");
22038
22039 int vlen_enc = vector_length_encoding(this, $src1);
22040 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22041 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22042 KRegister mask = k0; // The comparison itself is not being masked.
22043 bool merge = false;
22044 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22045
22046 switch (src1_elem_bt) {
22047 case T_INT: {
22048 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22049 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22050 break;
22051 }
22052 case T_LONG: {
22053 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22054 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22055 break;
22056 }
22057 default: assert(false, "%s", type2name(src1_elem_bt));
22058 }
22059 %}
22060 ins_pipe( pipe_slow );
22061 %}
22062
22063
22064 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22065 predicate(n->bottom_type()->isa_pvectmask() &&
22066 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22067 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22068 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22069 ins_encode %{
22070 assert(UseAVX > 2, "required");
22071 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22072
22073 int vlen_enc = vector_length_encoding(this, $src1);
22074 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22075 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22076 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22077
22078 // Comparison i
22079 switch (src1_elem_bt) {
22080 case T_BYTE: {
22081 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22082 break;
22083 }
22084 case T_SHORT: {
22085 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22086 break;
22087 }
22088 case T_INT: {
22089 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22090 break;
22091 }
22092 case T_LONG: {
22093 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22094 break;
22095 }
22096 default: assert(false, "%s", type2name(src1_elem_bt));
22097 }
22098 %}
22099 ins_pipe( pipe_slow );
22100 %}
22101
22102 // Extract
22103
22104 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22105 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22106 match(Set dst (ExtractI src idx));
22107 match(Set dst (ExtractS src idx));
22108 match(Set dst (ExtractB src idx));
22109 format %{ "extractI $dst,$src,$idx\t!" %}
22110 ins_encode %{
22111 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22112
22113 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22114 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22115 %}
22116 ins_pipe( pipe_slow );
22117 %}
22118
22119 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22120 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22121 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22122 match(Set dst (ExtractI src idx));
22123 match(Set dst (ExtractS src idx));
22124 match(Set dst (ExtractB src idx));
22125 effect(TEMP vtmp);
22126 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22127 ins_encode %{
22128 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22129
22130 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22131 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22132 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22133 %}
22134 ins_pipe( pipe_slow );
22135 %}
22136
22137 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22138 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22139 match(Set dst (ExtractL src idx));
22140 format %{ "extractL $dst,$src,$idx\t!" %}
22141 ins_encode %{
22142 assert(UseSSE >= 4, "required");
22143 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22144
22145 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22146 %}
22147 ins_pipe( pipe_slow );
22148 %}
22149
22150 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22151 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22152 Matcher::vector_length(n->in(1)) == 8); // src
22153 match(Set dst (ExtractL src idx));
22154 effect(TEMP vtmp);
22155 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22156 ins_encode %{
22157 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22158
22159 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22160 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22161 %}
22162 ins_pipe( pipe_slow );
22163 %}
22164
22165 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22166 predicate(Matcher::vector_length(n->in(1)) <= 4);
22167 match(Set dst (ExtractF src idx));
22168 effect(TEMP dst, TEMP vtmp);
22169 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22170 ins_encode %{
22171 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22172
22173 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22174 %}
22175 ins_pipe( pipe_slow );
22176 %}
22177
22178 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22179 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22180 Matcher::vector_length(n->in(1)/*src*/) == 16);
22181 match(Set dst (ExtractF src idx));
22182 effect(TEMP vtmp);
22183 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22184 ins_encode %{
22185 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22186
22187 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22188 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22189 %}
22190 ins_pipe( pipe_slow );
22191 %}
22192
22193 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22194 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22195 match(Set dst (ExtractD src idx));
22196 format %{ "extractD $dst,$src,$idx\t!" %}
22197 ins_encode %{
22198 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22199
22200 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22201 %}
22202 ins_pipe( pipe_slow );
22203 %}
22204
22205 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22206 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22207 Matcher::vector_length(n->in(1)) == 8); // src
22208 match(Set dst (ExtractD src idx));
22209 effect(TEMP vtmp);
22210 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22211 ins_encode %{
22212 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22213
22214 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22215 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22216 %}
22217 ins_pipe( pipe_slow );
22218 %}
22219
22220 // --------------------------------- Vector Blend --------------------------------------
22221
22222 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22223 predicate(UseAVX == 0);
22224 match(Set dst (VectorBlend (Binary dst src) mask));
22225 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22226 effect(TEMP tmp);
22227 ins_encode %{
22228 assert(UseSSE >= 4, "required");
22229
22230 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22231 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22232 }
22233 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22234 %}
22235 ins_pipe( pipe_slow );
22236 %}
22237
22238 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22239 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22240 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22241 Matcher::vector_length_in_bytes(n) <= 32 &&
22242 is_integral_type(Matcher::vector_element_basic_type(n)));
22243 match(Set dst (VectorBlend (Binary src1 src2) mask));
22244 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22245 ins_encode %{
22246 int vlen_enc = vector_length_encoding(this);
22247 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22248 %}
22249 ins_pipe( pipe_slow );
22250 %}
22251
22252 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22253 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22254 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22255 Matcher::vector_length_in_bytes(n) <= 32 &&
22256 !is_integral_type(Matcher::vector_element_basic_type(n)));
22257 match(Set dst (VectorBlend (Binary src1 src2) mask));
22258 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22259 ins_encode %{
22260 int vlen_enc = vector_length_encoding(this);
22261 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22262 %}
22263 ins_pipe( pipe_slow );
22264 %}
22265
22266 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22267 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22268 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22269 Matcher::vector_length_in_bytes(n) <= 32);
22270 match(Set dst (VectorBlend (Binary src1 src2) mask));
22271 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22272 effect(TEMP vtmp, TEMP dst);
22273 ins_encode %{
22274 int vlen_enc = vector_length_encoding(this);
22275 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22276 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22277 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22278 %}
22279 ins_pipe( pipe_slow );
22280 %}
22281
22282 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22283 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22284 n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22285 match(Set dst (VectorBlend (Binary src1 src2) mask));
22286 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22287 effect(TEMP ktmp);
22288 ins_encode %{
22289 int vlen_enc = Assembler::AVX_512bit;
22290 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22291 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22292 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22293 %}
22294 ins_pipe( pipe_slow );
22295 %}
22296
22297
22298 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22299 predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22300 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22301 VM_Version::supports_avx512bw()));
22302 match(Set dst (VectorBlend (Binary src1 src2) mask));
22303 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22304 ins_encode %{
22305 int vlen_enc = vector_length_encoding(this);
22306 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22307 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22308 %}
22309 ins_pipe( pipe_slow );
22310 %}
22311
22312 // --------------------------------- ABS --------------------------------------
22313 // a = |a|
22314 instruct vabsB_reg(vec dst, vec src) %{
22315 match(Set dst (AbsVB src));
22316 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22317 ins_encode %{
22318 uint vlen = Matcher::vector_length(this);
22319 if (vlen <= 16) {
22320 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22321 } else {
22322 int vlen_enc = vector_length_encoding(this);
22323 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22324 }
22325 %}
22326 ins_pipe( pipe_slow );
22327 %}
22328
22329 instruct vabsS_reg(vec dst, vec src) %{
22330 match(Set dst (AbsVS src));
22331 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22332 ins_encode %{
22333 uint vlen = Matcher::vector_length(this);
22334 if (vlen <= 8) {
22335 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22336 } else {
22337 int vlen_enc = vector_length_encoding(this);
22338 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22339 }
22340 %}
22341 ins_pipe( pipe_slow );
22342 %}
22343
22344 instruct vabsI_reg(vec dst, vec src) %{
22345 match(Set dst (AbsVI src));
22346 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22347 ins_encode %{
22348 uint vlen = Matcher::vector_length(this);
22349 if (vlen <= 4) {
22350 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22351 } else {
22352 int vlen_enc = vector_length_encoding(this);
22353 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22354 }
22355 %}
22356 ins_pipe( pipe_slow );
22357 %}
22358
22359 instruct vabsL_reg(vec dst, vec src) %{
22360 match(Set dst (AbsVL src));
22361 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22362 ins_encode %{
22363 assert(UseAVX > 2, "required");
22364 int vlen_enc = vector_length_encoding(this);
22365 if (!VM_Version::supports_avx512vl()) {
22366 vlen_enc = Assembler::AVX_512bit;
22367 }
22368 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22369 %}
22370 ins_pipe( pipe_slow );
22371 %}
22372
22373 // --------------------------------- ABSNEG --------------------------------------
22374
22375 instruct vabsnegF(vec dst, vec src) %{
22376 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22377 match(Set dst (AbsVF src));
22378 match(Set dst (NegVF src));
22379 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22380 ins_cost(150);
22381 ins_encode %{
22382 int opcode = this->ideal_Opcode();
22383 int vlen = Matcher::vector_length(this);
22384 if (vlen == 2) {
22385 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22386 } else {
22387 assert(vlen == 8 || vlen == 16, "required");
22388 int vlen_enc = vector_length_encoding(this);
22389 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22390 }
22391 %}
22392 ins_pipe( pipe_slow );
22393 %}
22394
22395 instruct vabsneg4F(vec dst) %{
22396 predicate(Matcher::vector_length(n) == 4);
22397 match(Set dst (AbsVF dst));
22398 match(Set dst (NegVF dst));
22399 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22400 ins_cost(150);
22401 ins_encode %{
22402 int opcode = this->ideal_Opcode();
22403 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22404 %}
22405 ins_pipe( pipe_slow );
22406 %}
22407
22408 instruct vabsnegD(vec dst, vec src) %{
22409 match(Set dst (AbsVD src));
22410 match(Set dst (NegVD src));
22411 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22412 ins_encode %{
22413 int opcode = this->ideal_Opcode();
22414 uint vlen = Matcher::vector_length(this);
22415 if (vlen == 2) {
22416 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22417 } else {
22418 int vlen_enc = vector_length_encoding(this);
22419 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22420 }
22421 %}
22422 ins_pipe( pipe_slow );
22423 %}
22424
22425 //------------------------------------- VectorTest --------------------------------------------
22426
22427 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22428 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22429 match(Set cr (VectorTest src1 src2));
22430 effect(TEMP vtmp);
22431 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22432 ins_encode %{
22433 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22434 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22435 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22436 %}
22437 ins_pipe( pipe_slow );
22438 %}
22439
22440 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22441 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22442 match(Set cr (VectorTest src1 src2));
22443 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22444 ins_encode %{
22445 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22446 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22447 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22448 %}
22449 ins_pipe( pipe_slow );
22450 %}
22451
22452 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22453 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22454 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22455 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22456 match(Set cr (VectorTest src1 src2));
22457 effect(TEMP tmp);
22458 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22459 ins_encode %{
22460 uint masklen = Matcher::vector_length(this, $src1);
22461 __ kmovwl($tmp$$Register, $src1$$KRegister);
22462 __ andl($tmp$$Register, (1 << masklen) - 1);
22463 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22464 %}
22465 ins_pipe( pipe_slow );
22466 %}
22467
22468 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22469 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22470 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22471 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22472 match(Set cr (VectorTest src1 src2));
22473 effect(TEMP tmp);
22474 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22475 ins_encode %{
22476 uint masklen = Matcher::vector_length(this, $src1);
22477 __ kmovwl($tmp$$Register, $src1$$KRegister);
22478 __ andl($tmp$$Register, (1 << masklen) - 1);
22479 %}
22480 ins_pipe( pipe_slow );
22481 %}
22482
22483 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22484 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22485 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22486 match(Set cr (VectorTest src1 src2));
22487 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22488 ins_encode %{
22489 uint masklen = Matcher::vector_length(this, $src1);
22490 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22491 %}
22492 ins_pipe( pipe_slow );
22493 %}
22494
22495 //------------------------------------- LoadMask --------------------------------------------
22496
22497 instruct loadMask(legVec dst, legVec src) %{
22498 predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22499 match(Set dst (VectorLoadMask src));
22500 effect(TEMP dst);
22501 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22502 ins_encode %{
22503 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22504 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22505 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22506 %}
22507 ins_pipe( pipe_slow );
22508 %}
22509
22510 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22511 predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22512 match(Set dst (VectorLoadMask src));
22513 effect(TEMP xtmp);
22514 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22515 ins_encode %{
22516 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22517 true, Assembler::AVX_512bit);
22518 %}
22519 ins_pipe( pipe_slow );
22520 %}
22521
22522 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22523 predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22524 match(Set dst (VectorLoadMask src));
22525 effect(TEMP xtmp);
22526 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22527 ins_encode %{
22528 int vlen_enc = vector_length_encoding(in(1));
22529 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22530 false, vlen_enc);
22531 %}
22532 ins_pipe( pipe_slow );
22533 %}
22534
22535 //------------------------------------- StoreMask --------------------------------------------
22536
22537 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22538 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22539 match(Set dst (VectorStoreMask src size));
22540 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22541 ins_encode %{
22542 int vlen = Matcher::vector_length(this);
22543 if (vlen <= 16 && UseAVX <= 2) {
22544 assert(UseSSE >= 3, "required");
22545 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22546 } else {
22547 assert(UseAVX > 0, "required");
22548 int src_vlen_enc = vector_length_encoding(this, $src);
22549 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22550 }
22551 %}
22552 ins_pipe( pipe_slow );
22553 %}
22554
22555 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22556 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22557 match(Set dst (VectorStoreMask src size));
22558 effect(TEMP_DEF dst, TEMP xtmp);
22559 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22560 ins_encode %{
22561 int vlen_enc = Assembler::AVX_128bit;
22562 int vlen = Matcher::vector_length(this);
22563 if (vlen <= 8) {
22564 assert(UseSSE >= 3, "required");
22565 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22566 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22567 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22568 } else {
22569 assert(UseAVX > 0, "required");
22570 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22571 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22572 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22573 }
22574 %}
22575 ins_pipe( pipe_slow );
22576 %}
22577
22578 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22579 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22580 match(Set dst (VectorStoreMask src size));
22581 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22582 effect(TEMP_DEF dst, TEMP xtmp);
22583 ins_encode %{
22584 int vlen_enc = Assembler::AVX_128bit;
22585 int vlen = Matcher::vector_length(this);
22586 if (vlen <= 4) {
22587 assert(UseSSE >= 3, "required");
22588 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22589 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22590 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22591 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22592 } else {
22593 assert(UseAVX > 0, "required");
22594 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22595 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22596 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22597 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22598 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22599 }
22600 %}
22601 ins_pipe( pipe_slow );
22602 %}
22603
22604 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22605 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22606 match(Set dst (VectorStoreMask src size));
22607 effect(TEMP_DEF dst, TEMP xtmp);
22608 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22609 ins_encode %{
22610 assert(UseSSE >= 3, "required");
22611 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22612 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22613 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22614 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22615 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22616 %}
22617 ins_pipe( pipe_slow );
22618 %}
22619
22620 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22621 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22622 match(Set dst (VectorStoreMask src size));
22623 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22624 effect(TEMP_DEF dst, TEMP vtmp);
22625 ins_encode %{
22626 int vlen_enc = Assembler::AVX_128bit;
22627 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22628 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22629 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22630 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22631 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22632 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22633 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22634 %}
22635 ins_pipe( pipe_slow );
22636 %}
22637
22638 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22639 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22640 match(Set dst (VectorStoreMask src size));
22641 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22642 ins_encode %{
22643 int src_vlen_enc = vector_length_encoding(this, $src);
22644 int dst_vlen_enc = vector_length_encoding(this);
22645 if (!VM_Version::supports_avx512vl()) {
22646 src_vlen_enc = Assembler::AVX_512bit;
22647 }
22648 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22649 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22650 %}
22651 ins_pipe( pipe_slow );
22652 %}
22653
22654 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22655 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22656 match(Set dst (VectorStoreMask src size));
22657 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22658 ins_encode %{
22659 int src_vlen_enc = vector_length_encoding(this, $src);
22660 int dst_vlen_enc = vector_length_encoding(this);
22661 if (!VM_Version::supports_avx512vl()) {
22662 src_vlen_enc = Assembler::AVX_512bit;
22663 }
22664 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22665 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22666 %}
22667 ins_pipe( pipe_slow );
22668 %}
22669
22670 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22671 predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22672 match(Set dst (VectorStoreMask mask size));
22673 effect(TEMP_DEF dst);
22674 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22675 ins_encode %{
22676 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22677 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22678 false, Assembler::AVX_512bit, noreg);
22679 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22680 %}
22681 ins_pipe( pipe_slow );
22682 %}
22683
22684 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22685 predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22686 match(Set dst (VectorStoreMask mask size));
22687 effect(TEMP_DEF dst);
22688 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22689 ins_encode %{
22690 int dst_vlen_enc = vector_length_encoding(this);
22691 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22692 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22693 %}
22694 ins_pipe( pipe_slow );
22695 %}
22696
22697 instruct vmaskcast_evex(kReg dst) %{
22698 match(Set dst (VectorMaskCast dst));
22699 ins_cost(0);
22700 format %{ "vector_mask_cast $dst" %}
22701 ins_encode %{
22702 // empty
22703 %}
22704 ins_pipe(empty);
22705 %}
22706
22707 instruct vmaskcast(vec dst) %{
22708 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22709 match(Set dst (VectorMaskCast dst));
22710 ins_cost(0);
22711 format %{ "vector_mask_cast $dst" %}
22712 ins_encode %{
22713 // empty
22714 %}
22715 ins_pipe(empty);
22716 %}
22717
22718 instruct vmaskcast_avx(vec dst, vec src) %{
22719 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
22720 match(Set dst (VectorMaskCast src));
22721 format %{ "vector_mask_cast $dst, $src" %}
22722 ins_encode %{
22723 int vlen = Matcher::vector_length(this);
22724 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
22725 BasicType dst_bt = Matcher::vector_element_basic_type(this);
22726 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
22727 %}
22728 ins_pipe(pipe_slow);
22729 %}
22730
22731 //-------------------------------- Load Iota Indices ----------------------------------
22732
22733 instruct loadIotaIndices(vec dst, immI_0 src) %{
22734 match(Set dst (VectorLoadConst src));
22735 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
22736 ins_encode %{
22737 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22738 BasicType bt = Matcher::vector_element_basic_type(this);
22739 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
22740 %}
22741 ins_pipe( pipe_slow );
22742 %}
22743
22744 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
22745 match(Set dst (PopulateIndex src1 src2));
22746 effect(TEMP dst, TEMP vtmp);
22747 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22748 ins_encode %{
22749 assert($src2$$constant == 1, "required");
22750 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22751 int vlen_enc = vector_length_encoding(this);
22752 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22753 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22754 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22755 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22756 %}
22757 ins_pipe( pipe_slow );
22758 %}
22759
22760 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
22761 match(Set dst (PopulateIndex src1 src2));
22762 effect(TEMP dst, TEMP vtmp);
22763 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22764 ins_encode %{
22765 assert($src2$$constant == 1, "required");
22766 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22767 int vlen_enc = vector_length_encoding(this);
22768 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22769 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22770 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22771 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22772 %}
22773 ins_pipe( pipe_slow );
22774 %}
22775
22776 //-------------------------------- Rearrange ----------------------------------
22777
22778 // LoadShuffle/Rearrange for Byte
22779 instruct rearrangeB(vec dst, vec shuffle) %{
22780 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22781 Matcher::vector_length(n) < 32);
22782 match(Set dst (VectorRearrange dst shuffle));
22783 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22784 ins_encode %{
22785 assert(UseSSE >= 4, "required");
22786 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22787 %}
22788 ins_pipe( pipe_slow );
22789 %}
22790
22791 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22792 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22793 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
22794 match(Set dst (VectorRearrange src shuffle));
22795 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22796 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22797 ins_encode %{
22798 assert(UseAVX >= 2, "required");
22799 // Swap src into vtmp1
22800 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22801 // Shuffle swapped src to get entries from other 128 bit lane
22802 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22803 // Shuffle original src to get entries from self 128 bit lane
22804 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22805 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22806 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22807 // Perform the blend
22808 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22809 %}
22810 ins_pipe( pipe_slow );
22811 %}
22812
22813
22814 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
22815 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22816 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
22817 match(Set dst (VectorRearrange src shuffle));
22818 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
22819 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
22820 ins_encode %{
22821 int vlen_enc = vector_length_encoding(this);
22822 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
22823 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
22824 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
22825 %}
22826 ins_pipe( pipe_slow );
22827 %}
22828
22829 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
22830 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22831 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
22832 match(Set dst (VectorRearrange src shuffle));
22833 format %{ "vector_rearrange $dst, $shuffle, $src" %}
22834 ins_encode %{
22835 int vlen_enc = vector_length_encoding(this);
22836 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22837 %}
22838 ins_pipe( pipe_slow );
22839 %}
22840
22841 // LoadShuffle/Rearrange for Short
22842
22843 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
22844 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22845 !VM_Version::supports_avx512bw());
22846 match(Set dst (VectorLoadShuffle src));
22847 effect(TEMP dst, TEMP vtmp);
22848 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22849 ins_encode %{
22850 // Create a byte shuffle mask from short shuffle mask
22851 // only byte shuffle instruction available on these platforms
22852 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22853 if (UseAVX == 0) {
22854 assert(vlen_in_bytes <= 16, "required");
22855 // Multiply each shuffle by two to get byte index
22856 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22857 __ psllw($vtmp$$XMMRegister, 1);
22858
22859 // Duplicate to create 2 copies of byte index
22860 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22861 __ psllw($dst$$XMMRegister, 8);
22862 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
22863
22864 // Add one to get alternate byte index
22865 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
22866 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22867 } else {
22868 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
22869 int vlen_enc = vector_length_encoding(this);
22870 // Multiply each shuffle by two to get byte index
22871 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
22872
22873 // Duplicate to create 2 copies of byte index
22874 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
22875 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22876
22877 // Add one to get alternate byte index
22878 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
22879 }
22880 %}
22881 ins_pipe( pipe_slow );
22882 %}
22883
22884 instruct rearrangeS(vec dst, vec shuffle) %{
22885 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22886 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
22887 match(Set dst (VectorRearrange dst shuffle));
22888 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22889 ins_encode %{
22890 assert(UseSSE >= 4, "required");
22891 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22892 %}
22893 ins_pipe( pipe_slow );
22894 %}
22895
22896 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22897 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22898 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
22899 match(Set dst (VectorRearrange src shuffle));
22900 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22901 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22902 ins_encode %{
22903 assert(UseAVX >= 2, "required");
22904 // Swap src into vtmp1
22905 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22906 // Shuffle swapped src to get entries from other 128 bit lane
22907 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22908 // Shuffle original src to get entries from self 128 bit lane
22909 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22910 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22911 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22912 // Perform the blend
22913 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22914 %}
22915 ins_pipe( pipe_slow );
22916 %}
22917
22918 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
22919 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22920 VM_Version::supports_avx512bw());
22921 match(Set dst (VectorRearrange src shuffle));
22922 format %{ "vector_rearrange $dst, $shuffle, $src" %}
22923 ins_encode %{
22924 int vlen_enc = vector_length_encoding(this);
22925 if (!VM_Version::supports_avx512vl()) {
22926 vlen_enc = Assembler::AVX_512bit;
22927 }
22928 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22929 %}
22930 ins_pipe( pipe_slow );
22931 %}
22932
22933 // LoadShuffle/Rearrange for Integer and Float
22934
22935 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
22936 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22937 Matcher::vector_length(n) == 4 && UseAVX == 0);
22938 match(Set dst (VectorLoadShuffle src));
22939 effect(TEMP dst, TEMP vtmp);
22940 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22941 ins_encode %{
22942 assert(UseSSE >= 4, "required");
22943
22944 // Create a byte shuffle mask from int shuffle mask
22945 // only byte shuffle instruction available on these platforms
22946
22947 // Duplicate and multiply each shuffle by 4
22948 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22949 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22950 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22951 __ psllw($vtmp$$XMMRegister, 2);
22952
22953 // Duplicate again to create 4 copies of byte index
22954 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22955 __ psllw($dst$$XMMRegister, 8);
22956 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
22957
22958 // Add 3,2,1,0 to get alternate byte index
22959 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
22960 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22961 %}
22962 ins_pipe( pipe_slow );
22963 %}
22964
22965 instruct rearrangeI(vec dst, vec shuffle) %{
22966 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22967 UseAVX == 0);
22968 match(Set dst (VectorRearrange dst shuffle));
22969 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22970 ins_encode %{
22971 assert(UseSSE >= 4, "required");
22972 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22973 %}
22974 ins_pipe( pipe_slow );
22975 %}
22976
22977 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
22978 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22979 UseAVX > 0);
22980 match(Set dst (VectorRearrange src shuffle));
22981 format %{ "vector_rearrange $dst, $shuffle, $src" %}
22982 ins_encode %{
22983 int vlen_enc = vector_length_encoding(this);
22984 BasicType bt = Matcher::vector_element_basic_type(this);
22985 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22986 %}
22987 ins_pipe( pipe_slow );
22988 %}
22989
22990 // LoadShuffle/Rearrange for Long and Double
22991
22992 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
22993 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
22994 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
22995 match(Set dst (VectorLoadShuffle src));
22996 effect(TEMP dst, TEMP vtmp);
22997 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22998 ins_encode %{
22999 assert(UseAVX >= 2, "required");
23000
23001 int vlen_enc = vector_length_encoding(this);
23002 // Create a double word shuffle mask from long shuffle mask
23003 // only double word shuffle instruction available on these platforms
23004
23005 // Multiply each shuffle by two to get double word index
23006 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23007
23008 // Duplicate each double word shuffle
23009 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23010 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23011
23012 // Add one to get alternate double word index
23013 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23014 %}
23015 ins_pipe( pipe_slow );
23016 %}
23017
23018 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23019 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23020 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23021 match(Set dst (VectorRearrange src shuffle));
23022 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23023 ins_encode %{
23024 assert(UseAVX >= 2, "required");
23025
23026 int vlen_enc = vector_length_encoding(this);
23027 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23028 %}
23029 ins_pipe( pipe_slow );
23030 %}
23031
23032 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23033 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23034 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23035 match(Set dst (VectorRearrange src shuffle));
23036 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23037 ins_encode %{
23038 assert(UseAVX > 2, "required");
23039
23040 int vlen_enc = vector_length_encoding(this);
23041 if (vlen_enc == Assembler::AVX_128bit) {
23042 vlen_enc = Assembler::AVX_256bit;
23043 }
23044 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23045 %}
23046 ins_pipe( pipe_slow );
23047 %}
23048
23049 // --------------------------------- FMA --------------------------------------
23050 // a * b + c
23051
23052 instruct vfmaF_reg(vec a, vec b, vec c) %{
23053 match(Set c (FmaVF c (Binary a b)));
23054 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23055 ins_cost(150);
23056 ins_encode %{
23057 assert(UseFMA, "not enabled");
23058 int vlen_enc = vector_length_encoding(this);
23059 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23060 %}
23061 ins_pipe( pipe_slow );
23062 %}
23063
23064 instruct vfmaF_mem(vec a, memory b, vec c) %{
23065 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23066 match(Set c (FmaVF c (Binary a (LoadVector b))));
23067 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23068 ins_cost(150);
23069 ins_encode %{
23070 assert(UseFMA, "not enabled");
23071 int vlen_enc = vector_length_encoding(this);
23072 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23073 %}
23074 ins_pipe( pipe_slow );
23075 %}
23076
23077 instruct vfmaD_reg(vec a, vec b, vec c) %{
23078 match(Set c (FmaVD c (Binary a b)));
23079 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23080 ins_cost(150);
23081 ins_encode %{
23082 assert(UseFMA, "not enabled");
23083 int vlen_enc = vector_length_encoding(this);
23084 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23085 %}
23086 ins_pipe( pipe_slow );
23087 %}
23088
23089 instruct vfmaD_mem(vec a, memory b, vec c) %{
23090 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23091 match(Set c (FmaVD c (Binary a (LoadVector b))));
23092 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23093 ins_cost(150);
23094 ins_encode %{
23095 assert(UseFMA, "not enabled");
23096 int vlen_enc = vector_length_encoding(this);
23097 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23098 %}
23099 ins_pipe( pipe_slow );
23100 %}
23101
23102 // --------------------------------- Vector Multiply Add --------------------------------------
23103
23104 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23105 predicate(UseAVX == 0);
23106 match(Set dst (MulAddVS2VI dst src1));
23107 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23108 ins_encode %{
23109 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23110 %}
23111 ins_pipe( pipe_slow );
23112 %}
23113
23114 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23115 predicate(UseAVX > 0);
23116 match(Set dst (MulAddVS2VI src1 src2));
23117 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23118 ins_encode %{
23119 int vlen_enc = vector_length_encoding(this);
23120 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23121 %}
23122 ins_pipe( pipe_slow );
23123 %}
23124
23125 // --------------------------------- Vector Multiply Add Add ----------------------------------
23126
23127 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23128 predicate(VM_Version::supports_avx512_vnni());
23129 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23130 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23131 ins_encode %{
23132 assert(UseAVX > 2, "required");
23133 int vlen_enc = vector_length_encoding(this);
23134 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23135 %}
23136 ins_pipe( pipe_slow );
23137 ins_cost(10);
23138 %}
23139
23140 // --------------------------------- PopCount --------------------------------------
23141
23142 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23143 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23144 match(Set dst (PopCountVI src));
23145 match(Set dst (PopCountVL src));
23146 format %{ "vector_popcount_integral $dst, $src" %}
23147 ins_encode %{
23148 int opcode = this->ideal_Opcode();
23149 int vlen_enc = vector_length_encoding(this, $src);
23150 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23151 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23152 %}
23153 ins_pipe( pipe_slow );
23154 %}
23155
23156 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23157 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23158 match(Set dst (PopCountVI src mask));
23159 match(Set dst (PopCountVL src mask));
23160 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23161 ins_encode %{
23162 int vlen_enc = vector_length_encoding(this, $src);
23163 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23164 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23165 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23166 %}
23167 ins_pipe( pipe_slow );
23168 %}
23169
23170 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23171 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23172 match(Set dst (PopCountVI src));
23173 match(Set dst (PopCountVL src));
23174 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23175 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23176 ins_encode %{
23177 int opcode = this->ideal_Opcode();
23178 int vlen_enc = vector_length_encoding(this, $src);
23179 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23180 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23181 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23182 %}
23183 ins_pipe( pipe_slow );
23184 %}
23185
23186 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23187
23188 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23189 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23190 Matcher::vector_length_in_bytes(n->in(1))));
23191 match(Set dst (CountTrailingZerosV src));
23192 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23193 ins_cost(400);
23194 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23195 ins_encode %{
23196 int vlen_enc = vector_length_encoding(this, $src);
23197 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23198 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23199 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23200 %}
23201 ins_pipe( pipe_slow );
23202 %}
23203
23204 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23205 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23206 VM_Version::supports_avx512cd() &&
23207 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23208 match(Set dst (CountTrailingZerosV src));
23209 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23210 ins_cost(400);
23211 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23212 ins_encode %{
23213 int vlen_enc = vector_length_encoding(this, $src);
23214 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23215 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23216 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23217 %}
23218 ins_pipe( pipe_slow );
23219 %}
23220
23221 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23222 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23223 match(Set dst (CountTrailingZerosV src));
23224 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23225 ins_cost(400);
23226 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23227 ins_encode %{
23228 int vlen_enc = vector_length_encoding(this, $src);
23229 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23230 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23231 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23232 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23233 %}
23234 ins_pipe( pipe_slow );
23235 %}
23236
23237 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23238 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23239 match(Set dst (CountTrailingZerosV src));
23240 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23241 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23242 ins_encode %{
23243 int vlen_enc = vector_length_encoding(this, $src);
23244 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23245 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23246 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23247 %}
23248 ins_pipe( pipe_slow );
23249 %}
23250
23251
23252 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23253
23254 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23255 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23256 effect(TEMP dst);
23257 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23258 ins_encode %{
23259 int vector_len = vector_length_encoding(this);
23260 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23261 %}
23262 ins_pipe( pipe_slow );
23263 %}
23264
23265 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23266 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23267 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23268 effect(TEMP dst);
23269 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23270 ins_encode %{
23271 int vector_len = vector_length_encoding(this);
23272 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23273 %}
23274 ins_pipe( pipe_slow );
23275 %}
23276
23277 // --------------------------------- Rotation Operations ----------------------------------
23278 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23279 match(Set dst (RotateLeftV src shift));
23280 match(Set dst (RotateRightV src shift));
23281 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23282 ins_encode %{
23283 int opcode = this->ideal_Opcode();
23284 int vector_len = vector_length_encoding(this);
23285 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23286 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23287 %}
23288 ins_pipe( pipe_slow );
23289 %}
23290
23291 instruct vprorate(vec dst, vec src, vec shift) %{
23292 match(Set dst (RotateLeftV src shift));
23293 match(Set dst (RotateRightV src shift));
23294 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23295 ins_encode %{
23296 int opcode = this->ideal_Opcode();
23297 int vector_len = vector_length_encoding(this);
23298 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23299 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23300 %}
23301 ins_pipe( pipe_slow );
23302 %}
23303
23304 // ---------------------------------- Masked Operations ------------------------------------
23305 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23306 predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23307 match(Set dst (LoadVectorMasked mem mask));
23308 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23309 ins_encode %{
23310 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23311 int vlen_enc = vector_length_encoding(this);
23312 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23313 %}
23314 ins_pipe( pipe_slow );
23315 %}
23316
23317
23318 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23319 predicate(n->in(3)->bottom_type()->isa_pvectmask());
23320 match(Set dst (LoadVectorMasked mem mask));
23321 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23322 ins_encode %{
23323 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23324 int vector_len = vector_length_encoding(this);
23325 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23326 %}
23327 ins_pipe( pipe_slow );
23328 %}
23329
23330 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23331 predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23332 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23333 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23334 ins_encode %{
23335 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23336 int vlen_enc = vector_length_encoding(src_node);
23337 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23338 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23339 %}
23340 ins_pipe( pipe_slow );
23341 %}
23342
23343 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23344 predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23345 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23346 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23347 ins_encode %{
23348 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23349 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23350 int vlen_enc = vector_length_encoding(src_node);
23351 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23352 %}
23353 ins_pipe( pipe_slow );
23354 %}
23355
23356 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23357 match(Set addr (VerifyVectorAlignment addr mask));
23358 effect(KILL cr);
23359 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23360 ins_encode %{
23361 Label Lskip;
23362 // check if masked bits of addr are zero
23363 __ testq($addr$$Register, $mask$$constant);
23364 __ jccb(Assembler::equal, Lskip);
23365 __ stop("verify_vector_alignment found a misaligned vector memory access");
23366 __ bind(Lskip);
23367 %}
23368 ins_pipe(pipe_slow);
23369 %}
23370
23371 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23372 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23373 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23374 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23375 ins_encode %{
23376 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23377 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23378
23379 Label DONE;
23380 int vlen_enc = vector_length_encoding(this, $src1);
23381 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23382
23383 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23384 __ mov64($dst$$Register, -1L);
23385 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23386 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23387 __ jccb(Assembler::carrySet, DONE);
23388 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23389 __ notq($dst$$Register);
23390 __ tzcntq($dst$$Register, $dst$$Register);
23391 __ bind(DONE);
23392 %}
23393 ins_pipe( pipe_slow );
23394 %}
23395
23396
23397 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23398 match(Set dst (VectorMaskGen len));
23399 effect(TEMP temp, KILL cr);
23400 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23401 ins_encode %{
23402 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23403 %}
23404 ins_pipe( pipe_slow );
23405 %}
23406
23407 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23408 match(Set dst (VectorMaskGen len));
23409 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23410 effect(TEMP temp);
23411 ins_encode %{
23412 if ($len$$constant > 0) {
23413 __ mov64($temp$$Register, right_n_bits($len$$constant));
23414 __ kmovql($dst$$KRegister, $temp$$Register);
23415 } else {
23416 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23417 }
23418 %}
23419 ins_pipe( pipe_slow );
23420 %}
23421
23422 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23423 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23424 match(Set dst (VectorMaskToLong mask));
23425 effect(TEMP dst, KILL cr);
23426 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23427 ins_encode %{
23428 int opcode = this->ideal_Opcode();
23429 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23430 int mask_len = Matcher::vector_length(this, $mask);
23431 int mask_size = mask_len * type2aelembytes(mbt);
23432 int vlen_enc = vector_length_encoding(this, $mask);
23433 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23434 $dst$$Register, mask_len, mask_size, vlen_enc);
23435 %}
23436 ins_pipe( pipe_slow );
23437 %}
23438
23439 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23440 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23441 match(Set dst (VectorMaskToLong mask));
23442 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23443 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23444 ins_encode %{
23445 int opcode = this->ideal_Opcode();
23446 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23447 int mask_len = Matcher::vector_length(this, $mask);
23448 int vlen_enc = vector_length_encoding(this, $mask);
23449 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23450 $dst$$Register, mask_len, mbt, vlen_enc);
23451 %}
23452 ins_pipe( pipe_slow );
23453 %}
23454
23455 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23456 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23457 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23458 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23459 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23460 ins_encode %{
23461 int opcode = this->ideal_Opcode();
23462 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23463 int mask_len = Matcher::vector_length(this, $mask);
23464 int vlen_enc = vector_length_encoding(this, $mask);
23465 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23466 $dst$$Register, mask_len, mbt, vlen_enc);
23467 %}
23468 ins_pipe( pipe_slow );
23469 %}
23470
23471 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23472 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23473 match(Set dst (VectorMaskTrueCount mask));
23474 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23475 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23476 ins_encode %{
23477 int opcode = this->ideal_Opcode();
23478 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23479 int mask_len = Matcher::vector_length(this, $mask);
23480 int mask_size = mask_len * type2aelembytes(mbt);
23481 int vlen_enc = vector_length_encoding(this, $mask);
23482 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23483 $tmp$$Register, mask_len, mask_size, vlen_enc);
23484 %}
23485 ins_pipe( pipe_slow );
23486 %}
23487
23488 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23489 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23490 match(Set dst (VectorMaskTrueCount mask));
23491 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23492 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23493 ins_encode %{
23494 int opcode = this->ideal_Opcode();
23495 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23496 int mask_len = Matcher::vector_length(this, $mask);
23497 int vlen_enc = vector_length_encoding(this, $mask);
23498 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23499 $tmp$$Register, mask_len, mbt, vlen_enc);
23500 %}
23501 ins_pipe( pipe_slow );
23502 %}
23503
23504 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23505 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23506 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23507 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23508 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23509 ins_encode %{
23510 int opcode = this->ideal_Opcode();
23511 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23512 int mask_len = Matcher::vector_length(this, $mask);
23513 int vlen_enc = vector_length_encoding(this, $mask);
23514 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23515 $tmp$$Register, mask_len, mbt, vlen_enc);
23516 %}
23517 ins_pipe( pipe_slow );
23518 %}
23519
23520 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23521 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23522 match(Set dst (VectorMaskFirstTrue mask));
23523 match(Set dst (VectorMaskLastTrue mask));
23524 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23525 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23526 ins_encode %{
23527 int opcode = this->ideal_Opcode();
23528 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23529 int mask_len = Matcher::vector_length(this, $mask);
23530 int mask_size = mask_len * type2aelembytes(mbt);
23531 int vlen_enc = vector_length_encoding(this, $mask);
23532 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23533 $tmp$$Register, mask_len, mask_size, vlen_enc);
23534 %}
23535 ins_pipe( pipe_slow );
23536 %}
23537
23538 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23539 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23540 match(Set dst (VectorMaskFirstTrue mask));
23541 match(Set dst (VectorMaskLastTrue mask));
23542 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23543 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23544 ins_encode %{
23545 int opcode = this->ideal_Opcode();
23546 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23547 int mask_len = Matcher::vector_length(this, $mask);
23548 int vlen_enc = vector_length_encoding(this, $mask);
23549 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23550 $tmp$$Register, mask_len, mbt, vlen_enc);
23551 %}
23552 ins_pipe( pipe_slow );
23553 %}
23554
23555 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23556 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23557 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23558 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23559 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23560 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23561 ins_encode %{
23562 int opcode = this->ideal_Opcode();
23563 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23564 int mask_len = Matcher::vector_length(this, $mask);
23565 int vlen_enc = vector_length_encoding(this, $mask);
23566 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23567 $tmp$$Register, mask_len, mbt, vlen_enc);
23568 %}
23569 ins_pipe( pipe_slow );
23570 %}
23571
23572 // --------------------------------- Compress/Expand Operations ---------------------------
23573 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23574 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23575 match(Set dst (CompressV src mask));
23576 match(Set dst (ExpandV src mask));
23577 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23578 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23579 ins_encode %{
23580 int opcode = this->ideal_Opcode();
23581 int vlen_enc = vector_length_encoding(this);
23582 BasicType bt = Matcher::vector_element_basic_type(this);
23583 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23584 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23585 %}
23586 ins_pipe( pipe_slow );
23587 %}
23588
23589 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23590 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23591 match(Set dst (CompressV src mask));
23592 match(Set dst (ExpandV src mask));
23593 format %{ "vector_compress_expand $dst, $src, $mask" %}
23594 ins_encode %{
23595 int opcode = this->ideal_Opcode();
23596 int vector_len = vector_length_encoding(this);
23597 BasicType bt = Matcher::vector_element_basic_type(this);
23598 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23599 %}
23600 ins_pipe( pipe_slow );
23601 %}
23602
23603 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23604 match(Set dst (CompressM mask));
23605 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23606 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23607 ins_encode %{
23608 assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
23609 int mask_len = Matcher::vector_length(this);
23610 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23611 %}
23612 ins_pipe( pipe_slow );
23613 %}
23614
23615 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23616
23617 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23618 predicate(!VM_Version::supports_gfni());
23619 match(Set dst (ReverseV src));
23620 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23621 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23622 ins_encode %{
23623 int vec_enc = vector_length_encoding(this);
23624 BasicType bt = Matcher::vector_element_basic_type(this);
23625 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23626 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23627 %}
23628 ins_pipe( pipe_slow );
23629 %}
23630
23631 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23632 predicate(VM_Version::supports_gfni());
23633 match(Set dst (ReverseV src));
23634 effect(TEMP dst, TEMP xtmp);
23635 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23636 ins_encode %{
23637 int vec_enc = vector_length_encoding(this);
23638 BasicType bt = Matcher::vector_element_basic_type(this);
23639 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23640 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23641 $xtmp$$XMMRegister);
23642 %}
23643 ins_pipe( pipe_slow );
23644 %}
23645
23646 instruct vreverse_byte_reg(vec dst, vec src) %{
23647 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23648 match(Set dst (ReverseBytesV src));
23649 effect(TEMP dst);
23650 format %{ "vector_reverse_byte $dst, $src" %}
23651 ins_encode %{
23652 int vec_enc = vector_length_encoding(this);
23653 BasicType bt = Matcher::vector_element_basic_type(this);
23654 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23655 %}
23656 ins_pipe( pipe_slow );
23657 %}
23658
23659 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23660 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23661 match(Set dst (ReverseBytesV src));
23662 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23663 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23664 ins_encode %{
23665 int vec_enc = vector_length_encoding(this);
23666 BasicType bt = Matcher::vector_element_basic_type(this);
23667 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23668 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23669 %}
23670 ins_pipe( pipe_slow );
23671 %}
23672
23673 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23674
23675 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23676 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23677 Matcher::vector_length_in_bytes(n->in(1))));
23678 match(Set dst (CountLeadingZerosV src));
23679 format %{ "vector_count_leading_zeros $dst, $src" %}
23680 ins_encode %{
23681 int vlen_enc = vector_length_encoding(this, $src);
23682 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23683 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23684 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23685 %}
23686 ins_pipe( pipe_slow );
23687 %}
23688
23689 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23690 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23691 Matcher::vector_length_in_bytes(n->in(1))));
23692 match(Set dst (CountLeadingZerosV src mask));
23693 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23694 ins_encode %{
23695 int vlen_enc = vector_length_encoding(this, $src);
23696 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23697 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23698 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23699 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23700 %}
23701 ins_pipe( pipe_slow );
23702 %}
23703
23704 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23705 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23706 VM_Version::supports_avx512cd() &&
23707 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23708 match(Set dst (CountLeadingZerosV src));
23709 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23710 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23711 ins_encode %{
23712 int vlen_enc = vector_length_encoding(this, $src);
23713 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23714 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23715 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23716 %}
23717 ins_pipe( pipe_slow );
23718 %}
23719
23720 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
23721 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23722 match(Set dst (CountLeadingZerosV src));
23723 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23724 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
23725 ins_encode %{
23726 int vlen_enc = vector_length_encoding(this, $src);
23727 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23728 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23729 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
23730 $rtmp$$Register, true, vlen_enc);
23731 %}
23732 ins_pipe( pipe_slow );
23733 %}
23734
23735 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
23736 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
23737 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23738 match(Set dst (CountLeadingZerosV src));
23739 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
23740 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
23741 ins_encode %{
23742 int vlen_enc = vector_length_encoding(this, $src);
23743 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23744 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23745 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
23746 %}
23747 ins_pipe( pipe_slow );
23748 %}
23749
23750 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23751 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
23752 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23753 match(Set dst (CountLeadingZerosV src));
23754 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23755 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23756 ins_encode %{
23757 int vlen_enc = vector_length_encoding(this, $src);
23758 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23759 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23760 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23761 %}
23762 ins_pipe( pipe_slow );
23763 %}
23764
23765 // ---------------------------------- Vector Masked Operations ------------------------------------
23766
23767 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
23768 match(Set dst (AddVB (Binary dst src2) mask));
23769 match(Set dst (AddVS (Binary dst src2) mask));
23770 match(Set dst (AddVI (Binary dst src2) mask));
23771 match(Set dst (AddVL (Binary dst src2) mask));
23772 match(Set dst (AddVF (Binary dst src2) mask));
23773 match(Set dst (AddVD (Binary dst src2) mask));
23774 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23775 ins_encode %{
23776 int vlen_enc = vector_length_encoding(this);
23777 BasicType bt = Matcher::vector_element_basic_type(this);
23778 int opc = this->ideal_Opcode();
23779 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23780 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23781 %}
23782 ins_pipe( pipe_slow );
23783 %}
23784
23785 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
23786 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
23787 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
23788 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
23789 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
23790 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
23791 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
23792 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23793 ins_encode %{
23794 int vlen_enc = vector_length_encoding(this);
23795 BasicType bt = Matcher::vector_element_basic_type(this);
23796 int opc = this->ideal_Opcode();
23797 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23798 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23799 %}
23800 ins_pipe( pipe_slow );
23801 %}
23802
23803 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
23804 match(Set dst (XorV (Binary dst src2) mask));
23805 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23806 ins_encode %{
23807 int vlen_enc = vector_length_encoding(this);
23808 BasicType bt = Matcher::vector_element_basic_type(this);
23809 int opc = this->ideal_Opcode();
23810 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23811 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23812 %}
23813 ins_pipe( pipe_slow );
23814 %}
23815
23816 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
23817 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
23818 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23819 ins_encode %{
23820 int vlen_enc = vector_length_encoding(this);
23821 BasicType bt = Matcher::vector_element_basic_type(this);
23822 int opc = this->ideal_Opcode();
23823 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23824 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23825 %}
23826 ins_pipe( pipe_slow );
23827 %}
23828
23829 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
23830 match(Set dst (OrV (Binary dst src2) mask));
23831 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23832 ins_encode %{
23833 int vlen_enc = vector_length_encoding(this);
23834 BasicType bt = Matcher::vector_element_basic_type(this);
23835 int opc = this->ideal_Opcode();
23836 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23837 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23838 %}
23839 ins_pipe( pipe_slow );
23840 %}
23841
23842 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
23843 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
23844 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23845 ins_encode %{
23846 int vlen_enc = vector_length_encoding(this);
23847 BasicType bt = Matcher::vector_element_basic_type(this);
23848 int opc = this->ideal_Opcode();
23849 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23850 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23851 %}
23852 ins_pipe( pipe_slow );
23853 %}
23854
23855 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
23856 match(Set dst (AndV (Binary dst src2) mask));
23857 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23858 ins_encode %{
23859 int vlen_enc = vector_length_encoding(this);
23860 BasicType bt = Matcher::vector_element_basic_type(this);
23861 int opc = this->ideal_Opcode();
23862 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23863 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23864 %}
23865 ins_pipe( pipe_slow );
23866 %}
23867
23868 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
23869 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
23870 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23871 ins_encode %{
23872 int vlen_enc = vector_length_encoding(this);
23873 BasicType bt = Matcher::vector_element_basic_type(this);
23874 int opc = this->ideal_Opcode();
23875 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23876 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23877 %}
23878 ins_pipe( pipe_slow );
23879 %}
23880
23881 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
23882 match(Set dst (SubVB (Binary dst src2) mask));
23883 match(Set dst (SubVS (Binary dst src2) mask));
23884 match(Set dst (SubVI (Binary dst src2) mask));
23885 match(Set dst (SubVL (Binary dst src2) mask));
23886 match(Set dst (SubVF (Binary dst src2) mask));
23887 match(Set dst (SubVD (Binary dst src2) mask));
23888 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23889 ins_encode %{
23890 int vlen_enc = vector_length_encoding(this);
23891 BasicType bt = Matcher::vector_element_basic_type(this);
23892 int opc = this->ideal_Opcode();
23893 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23894 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23895 %}
23896 ins_pipe( pipe_slow );
23897 %}
23898
23899 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
23900 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
23901 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
23902 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
23903 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
23904 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
23905 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
23906 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23907 ins_encode %{
23908 int vlen_enc = vector_length_encoding(this);
23909 BasicType bt = Matcher::vector_element_basic_type(this);
23910 int opc = this->ideal_Opcode();
23911 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23912 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23913 %}
23914 ins_pipe( pipe_slow );
23915 %}
23916
23917 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
23918 match(Set dst (MulVS (Binary dst src2) mask));
23919 match(Set dst (MulVI (Binary dst src2) mask));
23920 match(Set dst (MulVL (Binary dst src2) mask));
23921 match(Set dst (MulVF (Binary dst src2) mask));
23922 match(Set dst (MulVD (Binary dst src2) mask));
23923 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23924 ins_encode %{
23925 int vlen_enc = vector_length_encoding(this);
23926 BasicType bt = Matcher::vector_element_basic_type(this);
23927 int opc = this->ideal_Opcode();
23928 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23929 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23930 %}
23931 ins_pipe( pipe_slow );
23932 %}
23933
23934 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
23935 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
23936 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
23937 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
23938 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
23939 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
23940 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23941 ins_encode %{
23942 int vlen_enc = vector_length_encoding(this);
23943 BasicType bt = Matcher::vector_element_basic_type(this);
23944 int opc = this->ideal_Opcode();
23945 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23946 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23947 %}
23948 ins_pipe( pipe_slow );
23949 %}
23950
23951 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
23952 match(Set dst (SqrtVF dst mask));
23953 match(Set dst (SqrtVD dst mask));
23954 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
23955 ins_encode %{
23956 int vlen_enc = vector_length_encoding(this);
23957 BasicType bt = Matcher::vector_element_basic_type(this);
23958 int opc = this->ideal_Opcode();
23959 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23960 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
23961 %}
23962 ins_pipe( pipe_slow );
23963 %}
23964
23965 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
23966 match(Set dst (DivVF (Binary dst src2) mask));
23967 match(Set dst (DivVD (Binary dst src2) mask));
23968 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23969 ins_encode %{
23970 int vlen_enc = vector_length_encoding(this);
23971 BasicType bt = Matcher::vector_element_basic_type(this);
23972 int opc = this->ideal_Opcode();
23973 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23974 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23975 %}
23976 ins_pipe( pipe_slow );
23977 %}
23978
23979 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
23980 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
23981 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
23982 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23983 ins_encode %{
23984 int vlen_enc = vector_length_encoding(this);
23985 BasicType bt = Matcher::vector_element_basic_type(this);
23986 int opc = this->ideal_Opcode();
23987 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23988 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23989 %}
23990 ins_pipe( pipe_slow );
23991 %}
23992
23993
23994 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
23995 match(Set dst (RotateLeftV (Binary dst shift) mask));
23996 match(Set dst (RotateRightV (Binary dst shift) mask));
23997 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
23998 ins_encode %{
23999 int vlen_enc = vector_length_encoding(this);
24000 BasicType bt = Matcher::vector_element_basic_type(this);
24001 int opc = this->ideal_Opcode();
24002 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24003 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24004 %}
24005 ins_pipe( pipe_slow );
24006 %}
24007
24008 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24009 match(Set dst (RotateLeftV (Binary dst src2) mask));
24010 match(Set dst (RotateRightV (Binary dst src2) mask));
24011 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24012 ins_encode %{
24013 int vlen_enc = vector_length_encoding(this);
24014 BasicType bt = Matcher::vector_element_basic_type(this);
24015 int opc = this->ideal_Opcode();
24016 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24017 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24018 %}
24019 ins_pipe( pipe_slow );
24020 %}
24021
24022 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24023 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24024 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24025 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24026 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24027 ins_encode %{
24028 int vlen_enc = vector_length_encoding(this);
24029 BasicType bt = Matcher::vector_element_basic_type(this);
24030 int opc = this->ideal_Opcode();
24031 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24032 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24033 %}
24034 ins_pipe( pipe_slow );
24035 %}
24036
24037 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24038 predicate(!n->as_ShiftV()->is_var_shift());
24039 match(Set dst (LShiftVS (Binary dst src2) mask));
24040 match(Set dst (LShiftVI (Binary dst src2) mask));
24041 match(Set dst (LShiftVL (Binary dst src2) mask));
24042 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24043 ins_encode %{
24044 int vlen_enc = vector_length_encoding(this);
24045 BasicType bt = Matcher::vector_element_basic_type(this);
24046 int opc = this->ideal_Opcode();
24047 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24048 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24049 %}
24050 ins_pipe( pipe_slow );
24051 %}
24052
24053 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24054 predicate(n->as_ShiftV()->is_var_shift());
24055 match(Set dst (LShiftVS (Binary dst src2) mask));
24056 match(Set dst (LShiftVI (Binary dst src2) mask));
24057 match(Set dst (LShiftVL (Binary dst src2) mask));
24058 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24059 ins_encode %{
24060 int vlen_enc = vector_length_encoding(this);
24061 BasicType bt = Matcher::vector_element_basic_type(this);
24062 int opc = this->ideal_Opcode();
24063 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24064 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24065 %}
24066 ins_pipe( pipe_slow );
24067 %}
24068
24069 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24070 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24071 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24072 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24073 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24074 ins_encode %{
24075 int vlen_enc = vector_length_encoding(this);
24076 BasicType bt = Matcher::vector_element_basic_type(this);
24077 int opc = this->ideal_Opcode();
24078 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24079 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24080 %}
24081 ins_pipe( pipe_slow );
24082 %}
24083
24084 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24085 predicate(!n->as_ShiftV()->is_var_shift());
24086 match(Set dst (RShiftVS (Binary dst src2) mask));
24087 match(Set dst (RShiftVI (Binary dst src2) mask));
24088 match(Set dst (RShiftVL (Binary dst src2) mask));
24089 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24090 ins_encode %{
24091 int vlen_enc = vector_length_encoding(this);
24092 BasicType bt = Matcher::vector_element_basic_type(this);
24093 int opc = this->ideal_Opcode();
24094 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24095 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24096 %}
24097 ins_pipe( pipe_slow );
24098 %}
24099
24100 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24101 predicate(n->as_ShiftV()->is_var_shift());
24102 match(Set dst (RShiftVS (Binary dst src2) mask));
24103 match(Set dst (RShiftVI (Binary dst src2) mask));
24104 match(Set dst (RShiftVL (Binary dst src2) mask));
24105 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24106 ins_encode %{
24107 int vlen_enc = vector_length_encoding(this);
24108 BasicType bt = Matcher::vector_element_basic_type(this);
24109 int opc = this->ideal_Opcode();
24110 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24111 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24112 %}
24113 ins_pipe( pipe_slow );
24114 %}
24115
24116 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24117 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24118 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24119 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24120 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24121 ins_encode %{
24122 int vlen_enc = vector_length_encoding(this);
24123 BasicType bt = Matcher::vector_element_basic_type(this);
24124 int opc = this->ideal_Opcode();
24125 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24126 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24127 %}
24128 ins_pipe( pipe_slow );
24129 %}
24130
24131 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24132 predicate(!n->as_ShiftV()->is_var_shift());
24133 match(Set dst (URShiftVS (Binary dst src2) mask));
24134 match(Set dst (URShiftVI (Binary dst src2) mask));
24135 match(Set dst (URShiftVL (Binary dst src2) mask));
24136 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24137 ins_encode %{
24138 int vlen_enc = vector_length_encoding(this);
24139 BasicType bt = Matcher::vector_element_basic_type(this);
24140 int opc = this->ideal_Opcode();
24141 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24142 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24143 %}
24144 ins_pipe( pipe_slow );
24145 %}
24146
24147 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24148 predicate(n->as_ShiftV()->is_var_shift());
24149 match(Set dst (URShiftVS (Binary dst src2) mask));
24150 match(Set dst (URShiftVI (Binary dst src2) mask));
24151 match(Set dst (URShiftVL (Binary dst src2) mask));
24152 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24153 ins_encode %{
24154 int vlen_enc = vector_length_encoding(this);
24155 BasicType bt = Matcher::vector_element_basic_type(this);
24156 int opc = this->ideal_Opcode();
24157 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24158 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24159 %}
24160 ins_pipe( pipe_slow );
24161 %}
24162
24163 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24164 match(Set dst (MaxV (Binary dst src2) mask));
24165 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24166 ins_encode %{
24167 int vlen_enc = vector_length_encoding(this);
24168 BasicType bt = Matcher::vector_element_basic_type(this);
24169 int opc = this->ideal_Opcode();
24170 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24171 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24172 %}
24173 ins_pipe( pipe_slow );
24174 %}
24175
24176 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24177 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24178 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24179 ins_encode %{
24180 int vlen_enc = vector_length_encoding(this);
24181 BasicType bt = Matcher::vector_element_basic_type(this);
24182 int opc = this->ideal_Opcode();
24183 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24184 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24185 %}
24186 ins_pipe( pipe_slow );
24187 %}
24188
24189 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24190 match(Set dst (MinV (Binary dst src2) mask));
24191 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24192 ins_encode %{
24193 int vlen_enc = vector_length_encoding(this);
24194 BasicType bt = Matcher::vector_element_basic_type(this);
24195 int opc = this->ideal_Opcode();
24196 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24197 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24198 %}
24199 ins_pipe( pipe_slow );
24200 %}
24201
24202 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24203 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24204 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24205 ins_encode %{
24206 int vlen_enc = vector_length_encoding(this);
24207 BasicType bt = Matcher::vector_element_basic_type(this);
24208 int opc = this->ideal_Opcode();
24209 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24210 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24211 %}
24212 ins_pipe( pipe_slow );
24213 %}
24214
24215 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24216 match(Set dst (VectorRearrange (Binary dst src2) mask));
24217 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24218 ins_encode %{
24219 int vlen_enc = vector_length_encoding(this);
24220 BasicType bt = Matcher::vector_element_basic_type(this);
24221 int opc = this->ideal_Opcode();
24222 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24223 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24224 %}
24225 ins_pipe( pipe_slow );
24226 %}
24227
24228 instruct vabs_masked(vec dst, kReg mask) %{
24229 match(Set dst (AbsVB dst mask));
24230 match(Set dst (AbsVS dst mask));
24231 match(Set dst (AbsVI dst mask));
24232 match(Set dst (AbsVL dst mask));
24233 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24234 ins_encode %{
24235 int vlen_enc = vector_length_encoding(this);
24236 BasicType bt = Matcher::vector_element_basic_type(this);
24237 int opc = this->ideal_Opcode();
24238 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24239 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24240 %}
24241 ins_pipe( pipe_slow );
24242 %}
24243
24244 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24245 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24246 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24247 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24248 ins_encode %{
24249 assert(UseFMA, "Needs FMA instructions support.");
24250 int vlen_enc = vector_length_encoding(this);
24251 BasicType bt = Matcher::vector_element_basic_type(this);
24252 int opc = this->ideal_Opcode();
24253 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24254 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24255 %}
24256 ins_pipe( pipe_slow );
24257 %}
24258
24259 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24260 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24261 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24262 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24263 ins_encode %{
24264 assert(UseFMA, "Needs FMA instructions support.");
24265 int vlen_enc = vector_length_encoding(this);
24266 BasicType bt = Matcher::vector_element_basic_type(this);
24267 int opc = this->ideal_Opcode();
24268 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24269 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24270 %}
24271 ins_pipe( pipe_slow );
24272 %}
24273
24274 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24275 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24276 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24277 ins_encode %{
24278 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24279 int vlen_enc = vector_length_encoding(this, $src1);
24280 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24281
24282 // Comparison i
24283 switch (src1_elem_bt) {
24284 case T_BYTE: {
24285 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24286 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24287 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24288 break;
24289 }
24290 case T_SHORT: {
24291 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24292 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24293 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24294 break;
24295 }
24296 case T_INT: {
24297 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24298 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24299 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24300 break;
24301 }
24302 case T_LONG: {
24303 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24304 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24305 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24306 break;
24307 }
24308 case T_FLOAT: {
24309 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24310 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24311 break;
24312 }
24313 case T_DOUBLE: {
24314 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24315 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24316 break;
24317 }
24318 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24319 }
24320 %}
24321 ins_pipe( pipe_slow );
24322 %}
24323
24324 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24325 predicate(Matcher::vector_length(n) <= 32);
24326 match(Set dst (MaskAll src));
24327 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24328 ins_encode %{
24329 int mask_len = Matcher::vector_length(this);
24330 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24331 %}
24332 ins_pipe( pipe_slow );
24333 %}
24334
24335 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24336 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24337 match(Set dst (XorVMask src (MaskAll cnt)));
24338 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24339 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24340 ins_encode %{
24341 uint masklen = Matcher::vector_length(this);
24342 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24343 %}
24344 ins_pipe( pipe_slow );
24345 %}
24346
24347 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24348 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24349 (Matcher::vector_length(n) == 16) ||
24350 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24351 match(Set dst (XorVMask src (MaskAll cnt)));
24352 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24353 ins_encode %{
24354 uint masklen = Matcher::vector_length(this);
24355 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24356 %}
24357 ins_pipe( pipe_slow );
24358 %}
24359
24360 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24361 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24362 match(Set dst (VectorLongToMask src));
24363 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24364 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24365 ins_encode %{
24366 int mask_len = Matcher::vector_length(this);
24367 int vec_enc = vector_length_encoding(mask_len);
24368 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24369 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24370 %}
24371 ins_pipe( pipe_slow );
24372 %}
24373
24374
24375 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24376 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24377 match(Set dst (VectorLongToMask src));
24378 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24379 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24380 ins_encode %{
24381 int mask_len = Matcher::vector_length(this);
24382 assert(mask_len <= 32, "invalid mask length");
24383 int vec_enc = vector_length_encoding(mask_len);
24384 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24385 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24386 %}
24387 ins_pipe( pipe_slow );
24388 %}
24389
24390 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24391 predicate(n->bottom_type()->isa_pvectmask());
24392 match(Set dst (VectorLongToMask src));
24393 format %{ "long_to_mask_evex $dst, $src\t!" %}
24394 ins_encode %{
24395 __ kmov($dst$$KRegister, $src$$Register);
24396 %}
24397 ins_pipe( pipe_slow );
24398 %}
24399
24400 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24401 match(Set dst (AndVMask src1 src2));
24402 match(Set dst (OrVMask src1 src2));
24403 match(Set dst (XorVMask src1 src2));
24404 effect(TEMP kscratch);
24405 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24406 ins_encode %{
24407 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24408 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24409 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24410 uint masklen = Matcher::vector_length(this);
24411 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24412 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24413 %}
24414 ins_pipe( pipe_slow );
24415 %}
24416
24417 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24418 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24419 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24420 ins_encode %{
24421 int vlen_enc = vector_length_encoding(this);
24422 BasicType bt = Matcher::vector_element_basic_type(this);
24423 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24424 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24425 %}
24426 ins_pipe( pipe_slow );
24427 %}
24428
24429 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24430 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24431 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24432 ins_encode %{
24433 int vlen_enc = vector_length_encoding(this);
24434 BasicType bt = Matcher::vector_element_basic_type(this);
24435 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24436 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24437 %}
24438 ins_pipe( pipe_slow );
24439 %}
24440
24441 instruct castMM(kReg dst)
24442 %{
24443 match(Set dst (CastVV dst));
24444
24445 size(0);
24446 format %{ "# castVV of $dst" %}
24447 ins_encode(/* empty encoding */);
24448 ins_cost(0);
24449 ins_pipe(empty);
24450 %}
24451
24452 instruct castVV(vec dst)
24453 %{
24454 match(Set dst (CastVV dst));
24455
24456 size(0);
24457 format %{ "# castVV of $dst" %}
24458 ins_encode(/* empty encoding */);
24459 ins_cost(0);
24460 ins_pipe(empty);
24461 %}
24462
24463 instruct castVVLeg(legVec dst)
24464 %{
24465 match(Set dst (CastVV dst));
24466
24467 size(0);
24468 format %{ "# castVV of $dst" %}
24469 ins_encode(/* empty encoding */);
24470 ins_cost(0);
24471 ins_pipe(empty);
24472 %}
24473
24474 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24475 %{
24476 match(Set dst (IsInfiniteF src));
24477 effect(TEMP ktmp, KILL cr);
24478 format %{ "float_class_check $dst, $src" %}
24479 ins_encode %{
24480 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24481 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24482 %}
24483 ins_pipe(pipe_slow);
24484 %}
24485
24486 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24487 %{
24488 match(Set dst (IsInfiniteD src));
24489 effect(TEMP ktmp, KILL cr);
24490 format %{ "double_class_check $dst, $src" %}
24491 ins_encode %{
24492 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24493 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24494 %}
24495 ins_pipe(pipe_slow);
24496 %}
24497
24498 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24499 %{
24500 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24501 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24502 match(Set dst (SaturatingAddV src1 src2));
24503 match(Set dst (SaturatingSubV src1 src2));
24504 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24505 ins_encode %{
24506 int vlen_enc = vector_length_encoding(this);
24507 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24508 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24509 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24510 %}
24511 ins_pipe(pipe_slow);
24512 %}
24513
24514 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24515 %{
24516 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24517 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24518 match(Set dst (SaturatingAddV src1 src2));
24519 match(Set dst (SaturatingSubV src1 src2));
24520 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24521 ins_encode %{
24522 int vlen_enc = vector_length_encoding(this);
24523 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24524 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24525 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24526 %}
24527 ins_pipe(pipe_slow);
24528 %}
24529
24530 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24531 %{
24532 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24533 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24534 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24535 match(Set dst (SaturatingAddV src1 src2));
24536 match(Set dst (SaturatingSubV src1 src2));
24537 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24538 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24539 ins_encode %{
24540 int vlen_enc = vector_length_encoding(this);
24541 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24542 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24543 $src1$$XMMRegister, $src2$$XMMRegister,
24544 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24545 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24546 %}
24547 ins_pipe(pipe_slow);
24548 %}
24549
24550 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24551 %{
24552 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24553 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24554 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24555 match(Set dst (SaturatingAddV src1 src2));
24556 match(Set dst (SaturatingSubV src1 src2));
24557 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24558 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24559 ins_encode %{
24560 int vlen_enc = vector_length_encoding(this);
24561 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24562 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24563 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24564 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24565 %}
24566 ins_pipe(pipe_slow);
24567 %}
24568
24569 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24570 %{
24571 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24572 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24573 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24574 match(Set dst (SaturatingAddV src1 src2));
24575 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24576 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24577 ins_encode %{
24578 int vlen_enc = vector_length_encoding(this);
24579 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24580 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24581 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24582 %}
24583 ins_pipe(pipe_slow);
24584 %}
24585
24586 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24587 %{
24588 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24589 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24590 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24591 match(Set dst (SaturatingAddV src1 src2));
24592 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24593 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24594 ins_encode %{
24595 int vlen_enc = vector_length_encoding(this);
24596 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24597 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24598 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24599 %}
24600 ins_pipe(pipe_slow);
24601 %}
24602
24603 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24604 %{
24605 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24606 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24607 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24608 match(Set dst (SaturatingSubV src1 src2));
24609 effect(TEMP ktmp);
24610 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24611 ins_encode %{
24612 int vlen_enc = vector_length_encoding(this);
24613 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24614 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24615 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24616 %}
24617 ins_pipe(pipe_slow);
24618 %}
24619
24620 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24621 %{
24622 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24623 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24624 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24625 match(Set dst (SaturatingSubV src1 src2));
24626 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24627 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24628 ins_encode %{
24629 int vlen_enc = vector_length_encoding(this);
24630 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24631 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24632 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24633 %}
24634 ins_pipe(pipe_slow);
24635 %}
24636
24637 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24638 %{
24639 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24640 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24641 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24642 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24643 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24644 ins_encode %{
24645 int vlen_enc = vector_length_encoding(this);
24646 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24647 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24648 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24649 %}
24650 ins_pipe(pipe_slow);
24651 %}
24652
24653 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24654 %{
24655 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24656 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24657 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24658 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24659 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24660 ins_encode %{
24661 int vlen_enc = vector_length_encoding(this);
24662 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24663 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24664 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24665 %}
24666 ins_pipe(pipe_slow);
24667 %}
24668
24669 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24670 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24671 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24672 match(Set dst (SaturatingAddV (Binary dst src) mask));
24673 match(Set dst (SaturatingSubV (Binary dst src) mask));
24674 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24675 ins_encode %{
24676 int vlen_enc = vector_length_encoding(this);
24677 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24678 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24679 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24680 %}
24681 ins_pipe( pipe_slow );
24682 %}
24683
24684 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24685 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24686 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24687 match(Set dst (SaturatingAddV (Binary dst src) mask));
24688 match(Set dst (SaturatingSubV (Binary dst src) mask));
24689 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24690 ins_encode %{
24691 int vlen_enc = vector_length_encoding(this);
24692 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24693 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24694 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24695 %}
24696 ins_pipe( pipe_slow );
24697 %}
24698
24699 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24700 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24701 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24702 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24703 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24704 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24705 ins_encode %{
24706 int vlen_enc = vector_length_encoding(this);
24707 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24708 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24709 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24710 %}
24711 ins_pipe( pipe_slow );
24712 %}
24713
24714 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24715 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24716 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24717 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24718 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24719 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24720 ins_encode %{
24721 int vlen_enc = vector_length_encoding(this);
24722 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24723 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24724 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
24725 %}
24726 ins_pipe( pipe_slow );
24727 %}
24728
24729 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
24730 %{
24731 match(Set index (SelectFromTwoVector (Binary index src1) src2));
24732 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
24733 ins_encode %{
24734 int vlen_enc = vector_length_encoding(this);
24735 BasicType bt = Matcher::vector_element_basic_type(this);
24736 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24737 %}
24738 ins_pipe(pipe_slow);
24739 %}
24740
24741 instruct reinterpretS2HF(regF dst, rRegI src)
24742 %{
24743 match(Set dst (ReinterpretS2HF src));
24744 format %{ "evmovw $dst, $src" %}
24745 ins_encode %{
24746 __ evmovw($dst$$XMMRegister, $src$$Register);
24747 %}
24748 ins_pipe(pipe_slow);
24749 %}
24750
24751 instruct reinterpretHF2S(rRegI dst, regF src)
24752 %{
24753 match(Set dst (ReinterpretHF2S src));
24754 format %{ "evmovw $dst, $src" %}
24755 ins_encode %{
24756 __ evmovw($dst$$Register, $src$$XMMRegister);
24757 __ narrow_subword_type($dst$$Register, T_SHORT);
24758 %}
24759 ins_pipe(pipe_slow);
24760 %}
24761
24762 instruct convF2HFAndS2HF(regF dst, regF src)
24763 %{
24764 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
24765 format %{ "convF2HFAndS2HF $dst, $src" %}
24766 ins_encode %{
24767 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
24768 %}
24769 ins_pipe(pipe_slow);
24770 %}
24771
24772 instruct convHF2SAndHF2F(regF dst, regF src)
24773 %{
24774 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
24775 format %{ "convHF2SAndHF2F $dst, $src" %}
24776 ins_encode %{
24777 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
24778 %}
24779 ins_pipe(pipe_slow);
24780 %}
24781
24782 instruct scalar_sqrt_HF_reg(regF dst, regF src)
24783 %{
24784 match(Set dst (SqrtHF src));
24785 format %{ "scalar_sqrt_fp16 $dst, $src" %}
24786 ins_encode %{
24787 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
24788 %}
24789 ins_pipe(pipe_slow);
24790 %}
24791
24792 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
24793 %{
24794 match(Set dst (AddHF src1 src2));
24795 match(Set dst (DivHF src1 src2));
24796 match(Set dst (MulHF src1 src2));
24797 match(Set dst (SubHF src1 src2));
24798 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
24799 ins_encode %{
24800 int opcode = this->ideal_Opcode();
24801 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
24802 %}
24803 ins_pipe(pipe_slow);
24804 %}
24805
24806 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
24807 %{
24808 predicate(VM_Version::supports_avx10_2());
24809 match(Set dst (MaxHF src1 src2));
24810 match(Set dst (MinHF src1 src2));
24811
24812 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
24813 ins_encode %{
24814 int opcode = this->ideal_Opcode();
24815 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
24816 %}
24817 ins_pipe( pipe_slow );
24818 %}
24819
24820 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
24821 %{
24822 predicate(!VM_Version::supports_avx10_2());
24823 match(Set dst (MaxHF src1 src2));
24824 match(Set dst (MinHF src1 src2));
24825 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24826
24827 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24828 ins_encode %{
24829 int opcode = this->ideal_Opcode();
24830 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
24831 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
24832 %}
24833 ins_pipe( pipe_slow );
24834 %}
24835
24836 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
24837 %{
24838 match(Set dst (FmaHF src2 (Binary dst src1)));
24839 effect(DEF dst);
24840 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24841 ins_encode %{
24842 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
24843 %}
24844 ins_pipe( pipe_slow );
24845 %}
24846
24847
24848 instruct vector_sqrt_HF_reg(vec dst, vec src)
24849 %{
24850 match(Set dst (SqrtVHF src));
24851 format %{ "vector_sqrt_fp16 $dst, $src" %}
24852 ins_encode %{
24853 int vlen_enc = vector_length_encoding(this);
24854 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24855 %}
24856 ins_pipe(pipe_slow);
24857 %}
24858
24859 instruct vector_sqrt_HF_mem(vec dst, memory src)
24860 %{
24861 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
24862 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
24863 ins_encode %{
24864 int vlen_enc = vector_length_encoding(this);
24865 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
24866 %}
24867 ins_pipe(pipe_slow);
24868 %}
24869
24870 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
24871 %{
24872 match(Set dst (AddVHF src1 src2));
24873 match(Set dst (DivVHF src1 src2));
24874 match(Set dst (MulVHF src1 src2));
24875 match(Set dst (SubVHF src1 src2));
24876 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
24877 ins_encode %{
24878 int vlen_enc = vector_length_encoding(this);
24879 int opcode = this->ideal_Opcode();
24880 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24881 %}
24882 ins_pipe(pipe_slow);
24883 %}
24884
24885
24886 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
24887 %{
24888 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
24889 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
24890 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
24891 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
24892 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
24893 ins_encode %{
24894 int vlen_enc = vector_length_encoding(this);
24895 int opcode = this->ideal_Opcode();
24896 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
24897 %}
24898 ins_pipe(pipe_slow);
24899 %}
24900
24901 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
24902 %{
24903 match(Set dst (FmaVHF src2 (Binary dst src1)));
24904 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24905 ins_encode %{
24906 int vlen_enc = vector_length_encoding(this);
24907 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
24908 %}
24909 ins_pipe( pipe_slow );
24910 %}
24911
24912 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
24913 %{
24914 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
24915 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24916 ins_encode %{
24917 int vlen_enc = vector_length_encoding(this);
24918 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
24919 %}
24920 ins_pipe( pipe_slow );
24921 %}
24922
24923 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
24924 %{
24925 predicate(VM_Version::supports_avx10_2());
24926 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
24927 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
24928 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
24929 ins_encode %{
24930 int vlen_enc = vector_length_encoding(this);
24931 int opcode = this->ideal_Opcode();
24932 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
24933 k0, vlen_enc);
24934 %}
24935 ins_pipe( pipe_slow );
24936 %}
24937
24938 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
24939 %{
24940 predicate(VM_Version::supports_avx10_2());
24941 match(Set dst (MinVHF src1 src2));
24942 match(Set dst (MaxVHF src1 src2));
24943 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
24944 ins_encode %{
24945 int vlen_enc = vector_length_encoding(this);
24946 int opcode = this->ideal_Opcode();
24947 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24948 k0, vlen_enc);
24949 %}
24950 ins_pipe( pipe_slow );
24951 %}
24952
24953 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
24954 %{
24955 predicate(!VM_Version::supports_avx10_2());
24956 match(Set dst (MinVHF src1 src2));
24957 match(Set dst (MaxVHF src1 src2));
24958 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24959 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24960 ins_encode %{
24961 int vlen_enc = vector_length_encoding(this);
24962 int opcode = this->ideal_Opcode();
24963 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
24964 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24965 %}
24966 ins_pipe( pipe_slow );
24967 %}
24968
24969 //----------PEEPHOLE RULES-----------------------------------------------------
24970 // These must follow all instruction definitions as they use the names
24971 // defined in the instructions definitions.
24972 //
24973 // peeppredicate ( rule_predicate );
24974 // // the predicate unless which the peephole rule will be ignored
24975 //
24976 // peepmatch ( root_instr_name [preceding_instruction]* );
24977 //
24978 // peepprocedure ( procedure_name );
24979 // // provide a procedure name to perform the optimization, the procedure should
24980 // // reside in the architecture dependent peephole file, the method has the
24981 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
24982 // // with the arguments being the basic block, the current node index inside the
24983 // // block, the register allocator, the functions upon invoked return a new node
24984 // // defined in peepreplace, and the rules of the nodes appearing in the
24985 // // corresponding peepmatch, the function return true if successful, else
24986 // // return false
24987 //
24988 // peepconstraint %{
24989 // (instruction_number.operand_name relational_op instruction_number.operand_name
24990 // [, ...] );
24991 // // instruction numbers are zero-based using left to right order in peepmatch
24992 //
24993 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
24994 // // provide an instruction_number.operand_name for each operand that appears
24995 // // in the replacement instruction's match rule
24996 //
24997 // ---------VM FLAGS---------------------------------------------------------
24998 //
24999 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25000 //
25001 // Each peephole rule is given an identifying number starting with zero and
25002 // increasing by one in the order seen by the parser. An individual peephole
25003 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25004 // on the command-line.
25005 //
25006 // ---------CURRENT LIMITATIONS----------------------------------------------
25007 //
25008 // Only transformations inside a basic block (do we need more for peephole)
25009 //
25010 // ---------EXAMPLE----------------------------------------------------------
25011 //
25012 // // pertinent parts of existing instructions in architecture description
25013 // instruct movI(rRegI dst, rRegI src)
25014 // %{
25015 // match(Set dst (CopyI src));
25016 // %}
25017 //
25018 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25019 // %{
25020 // match(Set dst (AddI dst src));
25021 // effect(KILL cr);
25022 // %}
25023 //
25024 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25025 // %{
25026 // match(Set dst (AddI dst src));
25027 // %}
25028 //
25029 // 1. Simple replacement
25030 // - Only match adjacent instructions in same basic block
25031 // - Only equality constraints
25032 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25033 // - Only one replacement instruction
25034 //
25035 // // Change (inc mov) to lea
25036 // peephole %{
25037 // // lea should only be emitted when beneficial
25038 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25039 // // increment preceded by register-register move
25040 // peepmatch ( incI_rReg movI );
25041 // // require that the destination register of the increment
25042 // // match the destination register of the move
25043 // peepconstraint ( 0.dst == 1.dst );
25044 // // construct a replacement instruction that sets
25045 // // the destination to ( move's source register + one )
25046 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25047 // %}
25048 //
25049 // 2. Procedural replacement
25050 // - More flexible finding relevent nodes
25051 // - More flexible constraints
25052 // - More flexible transformations
25053 // - May utilise architecture-dependent API more effectively
25054 // - Currently only one replacement instruction due to adlc parsing capabilities
25055 //
25056 // // Change (inc mov) to lea
25057 // peephole %{
25058 // // lea should only be emitted when beneficial
25059 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25060 // // the rule numbers of these nodes inside are passed into the function below
25061 // peepmatch ( incI_rReg movI );
25062 // // the method that takes the responsibility of transformation
25063 // peepprocedure ( inc_mov_to_lea );
25064 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25065 // // node is passed into the function above
25066 // peepreplace ( leaI_rReg_immI() );
25067 // %}
25068
25069 // These instructions is not matched by the matcher but used by the peephole
25070 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25071 %{
25072 predicate(false);
25073 match(Set dst (AddI src1 src2));
25074 format %{ "leal $dst, [$src1 + $src2]" %}
25075 ins_encode %{
25076 Register dst = $dst$$Register;
25077 Register src1 = $src1$$Register;
25078 Register src2 = $src2$$Register;
25079 if (src1 != rbp && src1 != r13) {
25080 __ leal(dst, Address(src1, src2, Address::times_1));
25081 } else {
25082 assert(src2 != rbp && src2 != r13, "");
25083 __ leal(dst, Address(src2, src1, Address::times_1));
25084 }
25085 %}
25086 ins_pipe(ialu_reg_reg);
25087 %}
25088
25089 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25090 %{
25091 predicate(false);
25092 match(Set dst (AddI src1 src2));
25093 format %{ "leal $dst, [$src1 + $src2]" %}
25094 ins_encode %{
25095 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25096 %}
25097 ins_pipe(ialu_reg_reg);
25098 %}
25099
25100 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25101 %{
25102 predicate(false);
25103 match(Set dst (LShiftI src shift));
25104 format %{ "leal $dst, [$src << $shift]" %}
25105 ins_encode %{
25106 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25107 Register src = $src$$Register;
25108 if (scale == Address::times_2 && src != rbp && src != r13) {
25109 __ leal($dst$$Register, Address(src, src, Address::times_1));
25110 } else {
25111 __ leal($dst$$Register, Address(noreg, src, scale));
25112 }
25113 %}
25114 ins_pipe(ialu_reg_reg);
25115 %}
25116
25117 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25118 %{
25119 predicate(false);
25120 match(Set dst (AddL src1 src2));
25121 format %{ "leaq $dst, [$src1 + $src2]" %}
25122 ins_encode %{
25123 Register dst = $dst$$Register;
25124 Register src1 = $src1$$Register;
25125 Register src2 = $src2$$Register;
25126 if (src1 != rbp && src1 != r13) {
25127 __ leaq(dst, Address(src1, src2, Address::times_1));
25128 } else {
25129 assert(src2 != rbp && src2 != r13, "");
25130 __ leaq(dst, Address(src2, src1, Address::times_1));
25131 }
25132 %}
25133 ins_pipe(ialu_reg_reg);
25134 %}
25135
25136 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25137 %{
25138 predicate(false);
25139 match(Set dst (AddL src1 src2));
25140 format %{ "leaq $dst, [$src1 + $src2]" %}
25141 ins_encode %{
25142 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25143 %}
25144 ins_pipe(ialu_reg_reg);
25145 %}
25146
25147 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25148 %{
25149 predicate(false);
25150 match(Set dst (LShiftL src shift));
25151 format %{ "leaq $dst, [$src << $shift]" %}
25152 ins_encode %{
25153 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25154 Register src = $src$$Register;
25155 if (scale == Address::times_2 && src != rbp && src != r13) {
25156 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25157 } else {
25158 __ leaq($dst$$Register, Address(noreg, src, scale));
25159 }
25160 %}
25161 ins_pipe(ialu_reg_reg);
25162 %}
25163
25164 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25165 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25166 // processors with at least partial ALU support for lea
25167 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25168 // beneficial for processors with full ALU support
25169 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25170
25171 peephole
25172 %{
25173 peeppredicate(VM_Version::supports_fast_2op_lea());
25174 peepmatch (addI_rReg);
25175 peepprocedure (lea_coalesce_reg);
25176 peepreplace (leaI_rReg_rReg_peep());
25177 %}
25178
25179 peephole
25180 %{
25181 peeppredicate(VM_Version::supports_fast_2op_lea());
25182 peepmatch (addI_rReg_imm);
25183 peepprocedure (lea_coalesce_imm);
25184 peepreplace (leaI_rReg_immI_peep());
25185 %}
25186
25187 peephole
25188 %{
25189 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25190 VM_Version::is_intel_cascade_lake());
25191 peepmatch (incI_rReg);
25192 peepprocedure (lea_coalesce_imm);
25193 peepreplace (leaI_rReg_immI_peep());
25194 %}
25195
25196 peephole
25197 %{
25198 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25199 VM_Version::is_intel_cascade_lake());
25200 peepmatch (decI_rReg);
25201 peepprocedure (lea_coalesce_imm);
25202 peepreplace (leaI_rReg_immI_peep());
25203 %}
25204
25205 peephole
25206 %{
25207 peeppredicate(VM_Version::supports_fast_2op_lea());
25208 peepmatch (salI_rReg_immI2);
25209 peepprocedure (lea_coalesce_imm);
25210 peepreplace (leaI_rReg_immI2_peep());
25211 %}
25212
25213 peephole
25214 %{
25215 peeppredicate(VM_Version::supports_fast_2op_lea());
25216 peepmatch (addL_rReg);
25217 peepprocedure (lea_coalesce_reg);
25218 peepreplace (leaL_rReg_rReg_peep());
25219 %}
25220
25221 peephole
25222 %{
25223 peeppredicate(VM_Version::supports_fast_2op_lea());
25224 peepmatch (addL_rReg_imm);
25225 peepprocedure (lea_coalesce_imm);
25226 peepreplace (leaL_rReg_immL32_peep());
25227 %}
25228
25229 peephole
25230 %{
25231 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25232 VM_Version::is_intel_cascade_lake());
25233 peepmatch (incL_rReg);
25234 peepprocedure (lea_coalesce_imm);
25235 peepreplace (leaL_rReg_immL32_peep());
25236 %}
25237
25238 peephole
25239 %{
25240 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25241 VM_Version::is_intel_cascade_lake());
25242 peepmatch (decL_rReg);
25243 peepprocedure (lea_coalesce_imm);
25244 peepreplace (leaL_rReg_immL32_peep());
25245 %}
25246
25247 peephole
25248 %{
25249 peeppredicate(VM_Version::supports_fast_2op_lea());
25250 peepmatch (salL_rReg_immI2);
25251 peepprocedure (lea_coalesce_imm);
25252 peepreplace (leaL_rReg_immI2_peep());
25253 %}
25254
25255 peephole
25256 %{
25257 peepmatch (leaPCompressedOopOffset);
25258 peepprocedure (lea_remove_redundant);
25259 %}
25260
25261 peephole
25262 %{
25263 peepmatch (leaP8Narrow);
25264 peepprocedure (lea_remove_redundant);
25265 %}
25266
25267 peephole
25268 %{
25269 peepmatch (leaP32Narrow);
25270 peepprocedure (lea_remove_redundant);
25271 %}
25272
25273 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25274 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25275
25276 //int variant
25277 peephole
25278 %{
25279 peepmatch (testI_reg);
25280 peepprocedure (test_may_remove);
25281 %}
25282
25283 //long variant
25284 peephole
25285 %{
25286 peepmatch (testL_reg);
25287 peepprocedure (test_may_remove);
25288 %}
25289
25290
25291 //----------SMARTSPILL RULES---------------------------------------------------
25292 // These must follow all instruction definitions as they use the names
25293 // defined in the instructions definitions.